diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:24:08 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:24:08 +0000 |
commit | f449f278dd3c70e479a035f50a9bb817a9b433ba (patch) | |
tree | 8ca2bfb785dda9bb4d573acdf9b42aea9cd51383 /src/contrib | |
parent | Initial commit. (diff) | |
download | knot-upstream/3.2.6.tar.xz knot-upstream/3.2.6.zip |
Adding upstream version 3.2.6.upstream/3.2.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
210 files changed, 84761 insertions, 0 deletions
diff --git a/src/contrib/Makefile.inc b/src/contrib/Makefile.inc new file mode 100644 index 0000000..64a0279 --- /dev/null +++ b/src/contrib/Makefile.inc @@ -0,0 +1,269 @@ +noinst_LTLIBRARIES += libcontrib.la + +libcontrib_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) +libcontrib_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) +libcontrib_la_LIBADD = $(pthread_LIBS) +libcontrib_LIBS = libcontrib.la +if USE_GNUTLS_MEMSET +libcontrib_la_CPPFLAGS += $(gnutls_CFLAGS) +libcontrib_LIBS += $(gnutls_LIBS) +endif USE_GNUTLS_MEMSET + +EXTRA_DIST += \ + contrib/licenses/0BSD \ + contrib/licenses/BSD-3-Clause \ + contrib/licenses/LGPL-2.0 \ + contrib/licenses/LGPL-2.1 \ + contrib/licenses/MIT \ + contrib/libbpf/LICENSE \ + contrib/libngtcp2/LICENSE \ + contrib/openbsd/LICENSE \ + contrib/ucw/LICENSE \ + contrib/url-parser/LICENSE \ + contrib/url-parser/README.md \ + contrib/dnstap/dnstap.proto + +libcontrib_la_SOURCES = \ + contrib/asan.h \ + contrib/base32hex.c \ + contrib/base32hex.h \ + contrib/base64.c \ + contrib/base64.h \ + contrib/base64url.c \ + contrib/base64url.h \ + contrib/conn_pool.c \ + contrib/conn_pool.h \ + contrib/color.h \ + contrib/ctype.h \ + contrib/files.c \ + contrib/files.h \ + contrib/getline.c \ + contrib/getline.h \ + contrib/json.c \ + contrib/json.h \ + contrib/macros.h \ + contrib/mempattern.c \ + contrib/mempattern.h \ + contrib/musl/inet_ntop.c \ + contrib/musl/inet_ntop.h \ + contrib/net.c \ + contrib/net.h \ + contrib/os.h \ + contrib/qp-trie/trie.c \ + contrib/qp-trie/trie.h \ + contrib/semaphore.c \ + contrib/semaphore.h \ + contrib/sockaddr.c \ + contrib/sockaddr.h \ + contrib/spinlock.h \ + contrib/string.c \ + contrib/string.h \ + contrib/strtonum.h \ + contrib/time.c \ + contrib/time.h \ + contrib/toeplitz.h \ + contrib/tolower.h \ + contrib/trim.h \ + contrib/wire_ctx.h \ + contrib/openbsd/siphash.c \ + contrib/openbsd/siphash.h \ + contrib/openbsd/strlcat.c \ + contrib/openbsd/strlcat.h \ + contrib/openbsd/strlcpy.c \ + contrib/openbsd/strlcpy.h \ + contrib/proxyv2/proxyv2.c \ + contrib/proxyv2/proxyv2.h \ + contrib/ucw/array-sort.h \ + contrib/ucw/binsearch.h \ + contrib/ucw/heap.c \ + contrib/ucw/heap.h \ + contrib/ucw/lists.c \ + contrib/ucw/lists.h \ + contrib/ucw/mempool.c \ + contrib/ucw/mempool.h \ + contrib/url-parser/url_parser.c \ + contrib/url-parser/url_parser.h \ + contrib/vpool/vpool.c \ + contrib/vpool/vpool.h + +if EMBEDDED_LIBBPF +noinst_LTLIBRARIES += libembbpf.la + +libembbpf_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(embedded_libbpf_CFLAGS) +libembbpf_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) +libembbpf_LIBS = libembbpf.la $(embedded_libbpf_LIBS) + +libembbpf_la_SOURCES = \ + contrib/libbpf/include/asm/barrier.h \ + contrib/libbpf/include/linux/compiler.h \ + contrib/libbpf/include/linux/err.h \ + contrib/libbpf/include/linux/filter.h \ + contrib/libbpf/include/linux/kernel.h \ + contrib/libbpf/include/linux/list.h \ + contrib/libbpf/include/linux/overflow.h \ + contrib/libbpf/include/linux/ring_buffer.h \ + contrib/libbpf/include/linux/types.h \ + contrib/libbpf/include/uapi/linux/bpf_common.h \ + contrib/libbpf/include/uapi/linux/bpf.h \ + contrib/libbpf/include/uapi/linux/btf.h \ + contrib/libbpf/include/uapi/linux/if_link.h \ + contrib/libbpf/include/uapi/linux/if_xdp.h \ + contrib/libbpf/include/uapi/linux/netlink.h \ + contrib/libbpf/bpf/bpf.c \ + contrib/libbpf/bpf/bpf.h \ + contrib/libbpf/bpf/bpf_core_read.h \ + contrib/libbpf/bpf/bpf_endian.h \ + contrib/libbpf/bpf/bpf_helper_defs.h \ + contrib/libbpf/bpf/bpf_helpers.h \ + contrib/libbpf/bpf/bpf_prog_linfo.c \ + contrib/libbpf/bpf/bpf_tracing.h \ + contrib/libbpf/bpf/btf.c \ + contrib/libbpf/bpf/btf.h \ + contrib/libbpf/bpf/btf_dump.c \ + contrib/libbpf/bpf/hashmap.c \ + contrib/libbpf/bpf/hashmap.h \ + contrib/libbpf/bpf/libbpf.c \ + contrib/libbpf/bpf/libbpf.h \ + contrib/libbpf/bpf/libbpf_errno.c \ + contrib/libbpf/bpf/libbpf_internal.h \ + contrib/libbpf/bpf/libbpf_probes.c \ + contrib/libbpf/bpf/libbpf_util.h \ + contrib/libbpf/bpf/netlink.c \ + contrib/libbpf/bpf/nlattr.c \ + contrib/libbpf/bpf/nlattr.h \ + contrib/libbpf/bpf/str_error.c \ + contrib/libbpf/bpf/str_error.h \ + contrib/libbpf/bpf/xsk.c \ + contrib/libbpf/bpf/xsk.h +endif EMBEDDED_LIBBPF + +if HAVE_LIBDNSTAP +noinst_LTLIBRARIES += libdnstap.la + +libdnstap_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(DNSTAP_CFLAGS) +libdnstap_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) +libdnstap_LIBS = libdnstap.la $(DNSTAP_LIBS) + +SUFFIXES = .proto .pb-c.c .pb-c.h + +.proto.pb-c.c: + $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $< + +.proto.pb-c.h: + $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $< + +libdnstap_la_SOURCES = \ + contrib/dnstap/convert.c \ + contrib/dnstap/convert.h \ + contrib/dnstap/dnstap.c \ + contrib/dnstap/dnstap.h \ + contrib/dnstap/message.c \ + contrib/dnstap/message.h \ + contrib/dnstap/reader.c \ + contrib/dnstap/reader.h \ + contrib/dnstap/writer.c \ + contrib/dnstap/writer.h + +nodist_libdnstap_la_SOURCES = \ + contrib/dnstap/dnstap.pb-c.c \ + contrib/dnstap/dnstap.pb-c.h + +BUILT_SOURCES += $(nodist_libdnstap_la_SOURCES) +CLEANFILES += $(nodist_libdnstap_la_SOURCES) +endif HAVE_LIBDNSTAP + +if EMBEDDED_LIBNGTCP2 +noinst_LTLIBRARIES += libembngtcp2.la + +libembngtcp2_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) \ + $(embedded_libngtcp2_CFLAGS) $(gnutls_CFLAGS) +libembngtcp2_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) +libembngtcp2_LIBS = libembngtcp2.la $(embedded_libngtcp2_LIBS) $(gnutls_LIBS) + +libembngtcp2_la_SOURCES = \ + contrib/libngtcp2/ngtcp2/crypto/gnutls.c \ + contrib/libngtcp2/ngtcp2/crypto/shared.c \ + contrib/libngtcp2/ngtcp2/crypto/shared.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c \ + contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h \ + contrib/libngtcp2/ngtcp2/ngtcp2.h \ + contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h \ + contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h \ + contrib/libngtcp2/ngtcp2/version.h +endif EMBEDDED_LIBNGTCP2 diff --git a/src/contrib/asan.h b/src/contrib/asan.h new file mode 100644 index 0000000..5feb2c1 --- /dev/null +++ b/src/contrib/asan.h @@ -0,0 +1,37 @@ +/* Copyright (C) 2015 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +/* + * see sanitizer/asan_interface.h in compiler-rt (LLVM) + */ +#ifndef __has_feature + #define __has_feature(feature) 0 +#endif +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) + void __asan_poison_memory_region(void const volatile *addr, size_t size); + void __asan_unpoison_memory_region(void const volatile *addr, size_t size); + #define ASAN_POISON_MEMORY_REGION(addr, size) \ + __asan_poison_memory_region((addr), (size)) + #define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + __asan_unpoison_memory_region((addr), (size)) +#else + #define ASAN_POISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) + #define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) +#endif diff --git a/src/contrib/base32hex.c b/src/contrib/base32hex.c new file mode 100644 index 0000000..ad216d2 --- /dev/null +++ b/src/contrib/base32hex.c @@ -0,0 +1,353 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "contrib/base32hex.h" +#include "libknot/errcode.h" + +#include <stdlib.h> +#include <stdint.h> + +/*! \brief Maximal length of binary input to Base32hex encoding. */ +#define MAX_BIN_DATA_LEN ((INT32_MAX / 8) * 5) + +/*! \brief Base32hex padding character. */ +static const uint8_t base32hex_pad = '='; +/*! \brief Base32hex alphabet. */ +static const uint8_t base32hex_enc[] = "0123456789abcdefghijklmnopqrstuv"; + +/*! \brief Indicates bad Base32hex character. */ +#define KO 255 +/*! \brief Indicates Base32hex padding character. */ +#define PD 32 + +/*! \brief Transformation and validation table for decoding Base32hex. */ +static const uint8_t base32hex_dec[256] = { + [ 0] = KO, [ 43] = KO, ['V'] = 31, [129] = KO, [172] = KO, [215] = KO, + [ 1] = KO, [ 44] = KO, ['W'] = KO, [130] = KO, [173] = KO, [216] = KO, + [ 2] = KO, [ 45] = KO, ['X'] = KO, [131] = KO, [174] = KO, [217] = KO, + [ 3] = KO, [ 46] = KO, ['Y'] = KO, [132] = KO, [175] = KO, [218] = KO, + [ 4] = KO, [ 47] = KO, ['Z'] = KO, [133] = KO, [176] = KO, [219] = KO, + [ 5] = KO, ['0'] = 0, [ 91] = KO, [134] = KO, [177] = KO, [220] = KO, + [ 6] = KO, ['1'] = 1, [ 92] = KO, [135] = KO, [178] = KO, [221] = KO, + [ 7] = KO, ['2'] = 2, [ 93] = KO, [136] = KO, [179] = KO, [222] = KO, + [ 8] = KO, ['3'] = 3, [ 94] = KO, [137] = KO, [180] = KO, [223] = KO, + [ 9] = KO, ['4'] = 4, [ 95] = KO, [138] = KO, [181] = KO, [224] = KO, + [ 10] = KO, ['5'] = 5, [ 96] = KO, [139] = KO, [182] = KO, [225] = KO, + [ 11] = KO, ['6'] = 6, ['a'] = 10, [140] = KO, [183] = KO, [226] = KO, + [ 12] = KO, ['7'] = 7, ['b'] = 11, [141] = KO, [184] = KO, [227] = KO, + [ 13] = KO, ['8'] = 8, ['c'] = 12, [142] = KO, [185] = KO, [228] = KO, + [ 14] = KO, ['9'] = 9, ['d'] = 13, [143] = KO, [186] = KO, [229] = KO, + [ 15] = KO, [ 58] = KO, ['e'] = 14, [144] = KO, [187] = KO, [230] = KO, + [ 16] = KO, [ 59] = KO, ['f'] = 15, [145] = KO, [188] = KO, [231] = KO, + [ 17] = KO, [ 60] = KO, ['g'] = 16, [146] = KO, [189] = KO, [232] = KO, + [ 18] = KO, ['='] = PD, ['h'] = 17, [147] = KO, [190] = KO, [233] = KO, + [ 19] = KO, [ 62] = KO, ['i'] = 18, [148] = KO, [191] = KO, [234] = KO, + [ 20] = KO, [ 63] = KO, ['j'] = 19, [149] = KO, [192] = KO, [235] = KO, + [ 21] = KO, [ 64] = KO, ['k'] = 20, [150] = KO, [193] = KO, [236] = KO, + [ 22] = KO, ['A'] = 10, ['l'] = 21, [151] = KO, [194] = KO, [237] = KO, + [ 23] = KO, ['B'] = 11, ['m'] = 22, [152] = KO, [195] = KO, [238] = KO, + [ 24] = KO, ['C'] = 12, ['n'] = 23, [153] = KO, [196] = KO, [239] = KO, + [ 25] = KO, ['D'] = 13, ['o'] = 24, [154] = KO, [197] = KO, [240] = KO, + [ 26] = KO, ['E'] = 14, ['p'] = 25, [155] = KO, [198] = KO, [241] = KO, + [ 27] = KO, ['F'] = 15, ['q'] = 26, [156] = KO, [199] = KO, [242] = KO, + [ 28] = KO, ['G'] = 16, ['r'] = 27, [157] = KO, [200] = KO, [243] = KO, + [ 29] = KO, ['H'] = 17, ['s'] = 28, [158] = KO, [201] = KO, [244] = KO, + [ 30] = KO, ['I'] = 18, ['t'] = 29, [159] = KO, [202] = KO, [245] = KO, + [ 31] = KO, ['J'] = 19, ['u'] = 30, [160] = KO, [203] = KO, [246] = KO, + [ 32] = KO, ['K'] = 20, ['v'] = 31, [161] = KO, [204] = KO, [247] = KO, + [ 33] = KO, ['L'] = 21, ['w'] = KO, [162] = KO, [205] = KO, [248] = KO, + [ 34] = KO, ['M'] = 22, ['x'] = KO, [163] = KO, [206] = KO, [249] = KO, + [ 35] = KO, ['N'] = 23, ['y'] = KO, [164] = KO, [207] = KO, [250] = KO, + [ 36] = KO, ['O'] = 24, ['z'] = KO, [165] = KO, [208] = KO, [251] = KO, + [ 37] = KO, ['P'] = 25, [123] = KO, [166] = KO, [209] = KO, [252] = KO, + [ 38] = KO, ['Q'] = 26, [124] = KO, [167] = KO, [210] = KO, [253] = KO, + [ 39] = KO, ['R'] = 27, [125] = KO, [168] = KO, [211] = KO, [254] = KO, + [ 40] = KO, ['S'] = 28, [126] = KO, [169] = KO, [212] = KO, [255] = KO, + [ 41] = KO, ['T'] = 29, [127] = KO, [170] = KO, [213] = KO, + [ 42] = KO, ['U'] = 30, [128] = KO, [171] = KO, [214] = KO, +}; + +int32_t knot_base32hex_encode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len) +{ + // Checking inputs. + if (in == NULL || out == NULL) { + return KNOT_EINVAL; + } + if (in_len > MAX_BIN_DATA_LEN || out_len < ((in_len + 4) / 5) * 8) { + return KNOT_ERANGE; + } + + uint8_t rest_len = in_len % 5; + const uint8_t *stop = in + in_len - rest_len; + uint8_t *text = out; + + // Encoding loop takes 5 bytes and creates 8 characters. + while (in < stop) { + text[0] = base32hex_enc[in[0] >> 3]; + text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6]; + text[2] = base32hex_enc[(in[1] & 0x3E) >> 1]; + text[3] = base32hex_enc[(in[1] & 0x01) << 4 | in[2] >> 4]; + text[4] = base32hex_enc[(in[2] & 0x0F) << 1 | in[3] >> 7]; + text[5] = base32hex_enc[(in[3] & 0x7C) >> 2]; + text[6] = base32hex_enc[(in[3] & 0x03) << 3 | in[4] >> 5]; + text[7] = base32hex_enc[in[4] & 0x1F]; + text += 8; + in += 5; + } + + // Processing of padding, if any. + switch (rest_len) { + case 4: + text[0] = base32hex_enc[in[0] >> 3]; + text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6]; + text[2] = base32hex_enc[(in[1] & 0x3E) >> 1]; + text[3] = base32hex_enc[(in[1] & 0x01) << 4 | in[2] >> 4]; + text[4] = base32hex_enc[(in[2] & 0x0F) << 1 | in[3] >> 7]; + text[5] = base32hex_enc[(in[3] & 0x7C) >> 2]; + text[6] = base32hex_enc[(in[3] & 0x03) << 3]; + text[7] = base32hex_pad; + text += 8; + break; + case 3: + text[0] = base32hex_enc[in[0] >> 3]; + text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6]; + text[2] = base32hex_enc[(in[1] & 0x3E) >> 1]; + text[3] = base32hex_enc[(in[1] & 0x01) << 4 | in[2] >> 4]; + text[4] = base32hex_enc[(in[2] & 0x0F) << 1]; + text[5] = base32hex_pad; + text[6] = base32hex_pad; + text[7] = base32hex_pad; + text += 8; + break; + case 2: + text[0] = base32hex_enc[in[0] >> 3]; + text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6]; + text[2] = base32hex_enc[(in[1] & 0x3E) >> 1]; + text[3] = base32hex_enc[(in[1] & 0x01) << 4]; + text[4] = base32hex_pad; + text[5] = base32hex_pad; + text[6] = base32hex_pad; + text[7] = base32hex_pad; + text += 8; + break; + case 1: + text[0] = base32hex_enc[in[0] >> 3]; + text[1] = base32hex_enc[(in[0] & 0x07) << 2]; + text[2] = base32hex_pad; + text[3] = base32hex_pad; + text[4] = base32hex_pad; + text[5] = base32hex_pad; + text[6] = base32hex_pad; + text[7] = base32hex_pad; + text += 8; + break; + } + + return (text - out); +} + +int32_t knot_base32hex_encode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out) +{ + // Checking inputs. + if (out == NULL) { + return KNOT_EINVAL; + } + if (in_len > MAX_BIN_DATA_LEN) { + return KNOT_ERANGE; + } + + // Compute output buffer length. + uint32_t out_len = ((in_len + 4) / 5) * 8; + + // Allocate output buffer. + *out = malloc(out_len); + if (*out == NULL) { + return KNOT_ENOMEM; + } + + // Encode data. + int32_t ret = knot_base32hex_encode(in, in_len, *out, out_len); + if (ret < 0) { + free(*out); + *out = NULL; + } + + return ret; +} + +int32_t knot_base32hex_decode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len) +{ + // Checking inputs. + if (in == NULL || out == NULL) { + return KNOT_EINVAL; + } + if (in_len > INT32_MAX || out_len < ((in_len + 7) / 8) * 5) { + return KNOT_ERANGE; + } + if ((in_len % 8) != 0) { + return KNOT_BASE32HEX_ESIZE; + } + + const uint8_t *stop = in + in_len; + uint8_t *bin = out; + uint8_t pad_len = 0; + uint8_t c1, c2, c3, c4, c5, c6, c7, c8; + + // Decoding loop takes 8 characters and creates 5 bytes. + while (in < stop) { + // Filling and transforming 8 Base32hex chars. + c1 = base32hex_dec[in[0]]; + c2 = base32hex_dec[in[1]]; + c3 = base32hex_dec[in[2]]; + c4 = base32hex_dec[in[3]]; + c5 = base32hex_dec[in[4]]; + c6 = base32hex_dec[in[5]]; + c7 = base32hex_dec[in[6]]; + c8 = base32hex_dec[in[7]]; + + // Check 8. char if is bad or padding. + if (c8 >= PD) { + if (c8 == PD && pad_len == 0) { + pad_len = 1; + } else { + return KNOT_BASE32HEX_ECHAR; + } + } + + // Check 7. char if is bad or padding (if so, 6. must be too). + if (c7 >= PD) { + if (c7 == PD && c6 == PD && pad_len == 1) { + pad_len = 3; + } else { + return KNOT_BASE32HEX_ECHAR; + } + } + + // Check 6. char if is bad or padding. + if (c6 >= PD) { + if (!(c6 == PD && pad_len == 3)) { + return KNOT_BASE32HEX_ECHAR; + } + } + + // Check 5. char if is bad or padding. + if (c5 >= PD) { + if (c5 == PD && pad_len == 3) { + pad_len = 4; + } else { + return KNOT_BASE32HEX_ECHAR; + } + } + + // Check 4. char if is bad or padding (if so, 3. must be too). + if (c4 >= PD) { + if (c4 == PD && c3 == PD && pad_len == 4) { + pad_len = 6; + } else { + return KNOT_BASE32HEX_ECHAR; + } + } + + // Check 3. char if is bad or padding. + if (c3 >= PD) { + if (!(c3 == PD && pad_len == 6)) { + return KNOT_BASE32HEX_ECHAR; + } + } + + // 1. and 2. chars must not be padding. + if (c2 >= PD || c1 >= PD) { + return KNOT_BASE32HEX_ECHAR; + } + + // Computing of output data based on padding length. + switch (pad_len) { + case 0: + bin[4] = (c7 << 5) + c8; + // FALLTHROUGH + case 1: + bin[3] = (c5 << 7) + (c6 << 2) + (c7 >> 3); + // FALLTHROUGH + case 3: + bin[2] = (c4 << 4) + (c5 >> 1); + // FALLTHROUGH + case 4: + bin[1] = (c2 << 6) + (c3 << 1) + (c4 >> 4); + // FALLTHROUGH + case 6: + bin[0] = (c1 << 3) + (c2 >> 2); + } + + // Update output end. + switch (pad_len) { + case 0: + bin += 5; + break; + case 1: + bin += 4; + break; + case 3: + bin += 3; + break; + case 4: + bin += 2; + break; + case 6: + bin += 1; + break; + } + + in += 8; + } + + return (bin - out); +} + +int32_t knot_base32hex_decode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out) +{ + // Checking inputs. + if (out == NULL) { + return KNOT_EINVAL; + } + + // Compute output buffer length. + uint32_t out_len = ((in_len + 7) / 8) * 5; + + // Allocate output buffer. + *out = malloc(out_len); + if (*out == NULL) { + return KNOT_ENOMEM; + } + + // Decode data. + int32_t ret = knot_base32hex_decode(in, in_len, *out, out_len); + if (ret < 0) { + free(*out); + *out = NULL; + } + + return ret; +} diff --git a/src/contrib/base32hex.h b/src/contrib/base32hex.h new file mode 100644 index 0000000..4a6fd8f --- /dev/null +++ b/src/contrib/base32hex.h @@ -0,0 +1,104 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Base32hex implementation (RFC 4648). + * + * \note Input Base32hex string can contain a-v characters. These characters + * are considered as A-V equivalent. Lower-case variant is used for encoding! + */ + +#pragma once + +#include <stdint.h> + +/*! + * \brief Encodes binary data using Base32hex. + * + * \note Output data buffer contains Base32hex text string which isn't + * terminated with '\0'! + * + * \param in Input binary data. + * \param in_len Length of input data. + * \param out Output data buffer. + * \param out_len Size of output buffer. + * + * \retval >=0 length of output string. + * \retval KNOT_E* if error. + */ +int32_t knot_base32hex_encode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len); + +/*! + * \brief Encodes binary data using Base32hex and output stores to own buffer. + * + * \note Output data buffer contains Base32hex text string which isn't + * terminated with '\0'! + * + * \note Output buffer should be deallocated after use. + * + * \param in Input binary data. + * \param in_len Length of input data. + * \param out Output data buffer. + * + * \retval >=0 length of output string. + * \retval KNOT_E* if error. + */ +int32_t knot_base32hex_encode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out); + +/*! + * \brief Decodes text data using Base32hex. + * + * \note Input data needn't be terminated with '\0'. + * + * \note Input data must be continuous Base32hex string! + * + * \param in Input text data. + * \param in_len Length of input string. + * \param out Output data buffer. + * \param out_len Size of output buffer. + * + * \retval >=0 length of output data. + * \retval KNOT_E* if error. + */ +int32_t knot_base32hex_decode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len); + +/*! + * \brief Decodes text data using Base32hex and output stores to own buffer. + * + * \note Input data needn't be terminated with '\0'. + * + * \note Input data must be continuous Base32hex string! + * + * \note Output buffer should be deallocated after use. + * + * \param in Input text data. + * \param in_len Length of input string. + * \param out Output data buffer. + * + * \retval >=0 length of output data. + * \retval KNOT_E* if error. + */ +int32_t knot_base32hex_decode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out); diff --git a/src/contrib/base64.c b/src/contrib/base64.c new file mode 100644 index 0000000..ab4a560 --- /dev/null +++ b/src/contrib/base64.c @@ -0,0 +1,272 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "contrib/base64.h" +#include "libknot/errcode.h" + +#include <stdlib.h> +#include <stdint.h> + +/*! \brief Maximal length of binary input to Base64 encoding. */ +#define MAX_BIN_DATA_LEN ((INT32_MAX / 4) * 3) + +/*! \brief Base64 padding character. */ +static const uint8_t base64_pad = '='; +/*! \brief Base64 alphabet. */ +static const uint8_t base64_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/*! \brief Indicates bad Base64 character. */ +#define KO 255 +/*! \brief Indicates Base64 padding character. */ +#define PD 64 + +/*! \brief Transformation and validation table for decoding Base64. */ +static const uint8_t base64_dec[256] = { + [ 0] = KO, ['+'] = 62, ['V'] = 21, [129] = KO, [172] = KO, [215] = KO, + [ 1] = KO, [ 44] = KO, ['W'] = 22, [130] = KO, [173] = KO, [216] = KO, + [ 2] = KO, [ 45] = KO, ['X'] = 23, [131] = KO, [174] = KO, [217] = KO, + [ 3] = KO, [ 46] = KO, ['Y'] = 24, [132] = KO, [175] = KO, [218] = KO, + [ 4] = KO, ['/'] = 63, ['Z'] = 25, [133] = KO, [176] = KO, [219] = KO, + [ 5] = KO, ['0'] = 52, [ 91] = KO, [134] = KO, [177] = KO, [220] = KO, + [ 6] = KO, ['1'] = 53, [ 92] = KO, [135] = KO, [178] = KO, [221] = KO, + [ 7] = KO, ['2'] = 54, [ 93] = KO, [136] = KO, [179] = KO, [222] = KO, + [ 8] = KO, ['3'] = 55, [ 94] = KO, [137] = KO, [180] = KO, [223] = KO, + [ 9] = KO, ['4'] = 56, [ 95] = KO, [138] = KO, [181] = KO, [224] = KO, + [ 10] = KO, ['5'] = 57, [ 96] = KO, [139] = KO, [182] = KO, [225] = KO, + [ 11] = KO, ['6'] = 58, ['a'] = 26, [140] = KO, [183] = KO, [226] = KO, + [ 12] = KO, ['7'] = 59, ['b'] = 27, [141] = KO, [184] = KO, [227] = KO, + [ 13] = KO, ['8'] = 60, ['c'] = 28, [142] = KO, [185] = KO, [228] = KO, + [ 14] = KO, ['9'] = 61, ['d'] = 29, [143] = KO, [186] = KO, [229] = KO, + [ 15] = KO, [ 58] = KO, ['e'] = 30, [144] = KO, [187] = KO, [230] = KO, + [ 16] = KO, [ 59] = KO, ['f'] = 31, [145] = KO, [188] = KO, [231] = KO, + [ 17] = KO, [ 60] = KO, ['g'] = 32, [146] = KO, [189] = KO, [232] = KO, + [ 18] = KO, ['='] = PD, ['h'] = 33, [147] = KO, [190] = KO, [233] = KO, + [ 19] = KO, [ 62] = KO, ['i'] = 34, [148] = KO, [191] = KO, [234] = KO, + [ 20] = KO, [ 63] = KO, ['j'] = 35, [149] = KO, [192] = KO, [235] = KO, + [ 21] = KO, [ 64] = KO, ['k'] = 36, [150] = KO, [193] = KO, [236] = KO, + [ 22] = KO, ['A'] = 0, ['l'] = 37, [151] = KO, [194] = KO, [237] = KO, + [ 23] = KO, ['B'] = 1, ['m'] = 38, [152] = KO, [195] = KO, [238] = KO, + [ 24] = KO, ['C'] = 2, ['n'] = 39, [153] = KO, [196] = KO, [239] = KO, + [ 25] = KO, ['D'] = 3, ['o'] = 40, [154] = KO, [197] = KO, [240] = KO, + [ 26] = KO, ['E'] = 4, ['p'] = 41, [155] = KO, [198] = KO, [241] = KO, + [ 27] = KO, ['F'] = 5, ['q'] = 42, [156] = KO, [199] = KO, [242] = KO, + [ 28] = KO, ['G'] = 6, ['r'] = 43, [157] = KO, [200] = KO, [243] = KO, + [ 29] = KO, ['H'] = 7, ['s'] = 44, [158] = KO, [201] = KO, [244] = KO, + [ 30] = KO, ['I'] = 8, ['t'] = 45, [159] = KO, [202] = KO, [245] = KO, + [ 31] = KO, ['J'] = 9, ['u'] = 46, [160] = KO, [203] = KO, [246] = KO, + [ 32] = KO, ['K'] = 10, ['v'] = 47, [161] = KO, [204] = KO, [247] = KO, + [ 33] = KO, ['L'] = 11, ['w'] = 48, [162] = KO, [205] = KO, [248] = KO, + [ 34] = KO, ['M'] = 12, ['x'] = 49, [163] = KO, [206] = KO, [249] = KO, + [ 35] = KO, ['N'] = 13, ['y'] = 50, [164] = KO, [207] = KO, [250] = KO, + [ 36] = KO, ['O'] = 14, ['z'] = 51, [165] = KO, [208] = KO, [251] = KO, + [ 37] = KO, ['P'] = 15, [123] = KO, [166] = KO, [209] = KO, [252] = KO, + [ 38] = KO, ['Q'] = 16, [124] = KO, [167] = KO, [210] = KO, [253] = KO, + [ 39] = KO, ['R'] = 17, [125] = KO, [168] = KO, [211] = KO, [254] = KO, + [ 40] = KO, ['S'] = 18, [126] = KO, [169] = KO, [212] = KO, [255] = KO, + [ 41] = KO, ['T'] = 19, [127] = KO, [170] = KO, [213] = KO, + [ 42] = KO, ['U'] = 20, [128] = KO, [171] = KO, [214] = KO, +}; + +int32_t knot_base64_encode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len) +{ + // Checking inputs. + if (in == NULL || out == NULL) { + return KNOT_EINVAL; + } + if (in_len > MAX_BIN_DATA_LEN || out_len < ((in_len + 2) / 3) * 4) { + return KNOT_ERANGE; + } + + uint8_t rest_len = in_len % 3; + const uint8_t *stop = in + in_len - rest_len; + uint8_t *text = out; + + // Encoding loop takes 3 bytes and creates 4 characters. + while (in < stop) { + text[0] = base64_enc[in[0] >> 2]; + text[1] = base64_enc[(in[0] & 0x03) << 4 | in[1] >> 4]; + text[2] = base64_enc[(in[1] & 0x0F) << 2 | in[2] >> 6]; + text[3] = base64_enc[in[2] & 0x3F]; + text += 4; + in += 3; + } + + // Processing of padding, if any. + switch (rest_len) { + case 2: + text[0] = base64_enc[in[0] >> 2]; + text[1] = base64_enc[(in[0] & 0x03) << 4 | in[1] >> 4]; + text[2] = base64_enc[(in[1] & 0x0F) << 2]; + text[3] = base64_pad; + text += 4; + break; + case 1: + text[0] = base64_enc[in[0] >> 2]; + text[1] = base64_enc[(in[0] & 0x03) << 4]; + text[2] = base64_pad; + text[3] = base64_pad; + text += 4; + break; + } + + return (text - out); +} + +int32_t knot_base64_encode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out) +{ + // Checking inputs. + if (out == NULL) { + return KNOT_EINVAL; + } + if (in_len > MAX_BIN_DATA_LEN) { + return KNOT_ERANGE; + } + + // Compute output buffer length. + uint32_t out_len = ((in_len + 2) / 3) * 4; + + // Allocate output buffer. + *out = malloc(out_len); + if (*out == NULL) { + return KNOT_ENOMEM; + } + + // Encode data. + int32_t ret = knot_base64_encode(in, in_len, *out, out_len); + if (ret < 0) { + free(*out); + *out = NULL; + } + + return ret; +} + +int32_t knot_base64_decode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len) +{ + // Checking inputs. + if (in == NULL || out == NULL) { + return KNOT_EINVAL; + } + if (in_len > INT32_MAX || out_len < ((in_len + 3) / 4) * 3) { + return KNOT_ERANGE; + } + if ((in_len % 4) != 0) { + return KNOT_BASE64_ESIZE; + } + + const uint8_t *stop = in + in_len; + uint8_t *bin = out; + uint8_t pad_len = 0; + uint8_t c1, c2, c3, c4; + + // Decoding loop takes 4 characters and creates 3 bytes. + while (in < stop) { + // Filling and transforming 4 Base64 chars. + c1 = base64_dec[in[0]]; + c2 = base64_dec[in[1]]; + c3 = base64_dec[in[2]]; + c4 = base64_dec[in[3]]; + + // Check 4. char if is bad or padding. + if (c4 >= PD) { + if (c4 == PD && pad_len == 0) { + pad_len = 1; + } else { + return KNOT_BASE64_ECHAR; + } + } + + // Check 3. char if is bad or padding. + if (c3 >= PD) { + if (c3 == PD && pad_len == 1) { + pad_len = 2; + } else { + return KNOT_BASE64_ECHAR; + } + } + + // Check 1. and 2. chars if are not padding. + if (c2 >= PD || c1 >= PD) { + return KNOT_BASE64_ECHAR; + } + + // Computing of output data based on padding length. + switch (pad_len) { + case 0: + bin[2] = (c3 << 6) + c4; + // FALLTHROUGH + case 1: + bin[1] = (c2 << 4) + (c3 >> 2); + // FALLTHROUGH + case 2: + bin[0] = (c1 << 2) + (c2 >> 4); + } + + // Update output end. + switch (pad_len) { + case 0: + bin += 3; + break; + case 1: + bin += 2; + break; + case 2: + bin += 1; + break; + } + + in += 4; + } + + return (bin - out); +} + +int32_t knot_base64_decode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out) +{ + // Checking inputs. + if (out == NULL) { + return KNOT_EINVAL; + } + + // Compute output buffer length. + uint32_t out_len = ((in_len + 3) / 4) * 3; + + // Allocate output buffer. + *out = malloc(out_len); + if (*out == NULL) { + return KNOT_ENOMEM; + } + + // Decode data. + int32_t ret = knot_base64_decode(in, in_len, *out, out_len); + if (ret < 0) { + free(*out); + *out = NULL; + } + + return ret; +} diff --git a/src/contrib/base64.h b/src/contrib/base64.h new file mode 100644 index 0000000..0a98733 --- /dev/null +++ b/src/contrib/base64.h @@ -0,0 +1,103 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Base64 implementation (RFC 4648). + */ + +#pragma once + +#include <stdint.h> + +/*! + * \brief Encodes binary data using Base64. + * + * \note Output data buffer contains Base64 text string which isn't + * terminated with '\0'! + * + * \param in Input binary data. + * \param in_len Length of input data. + * \param out Output data buffer. + * \param out_len Size of output buffer. + * + * \retval >=0 length of output string. + * \retval KNOT_E* if error. + */ +int32_t knot_base64_encode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len); + +/*! + * \brief Encodes binary data using Base64 and output stores to own buffer. + * + * \note Output data buffer contains Base64 text string which isn't + * terminated with '\0'! + * + * \note Output buffer should be deallocated after use. + * + * \param in Input binary data. + * \param in_len Length of input data. + * \param out Output data buffer. + * + * \retval >=0 length of output string. + * \retval KNOT_E* if error. + */ +int32_t knot_base64_encode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out); + +/*! + * \brief Decodes text data using Base64. + * + * \note Input data needn't be terminated with '\0'. + * + * \note Input data must be continuous Base64 string! + * + * \param in Input text data. + * \param in_len Length of input string. + * \param out Output data buffer. + * \param out_len Size of output buffer. + * + * \retval >=0 length of output data. + * \retval KNOT_E* if error. + */ +int32_t knot_base64_decode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len); + +/*! + * \brief Decodes text data using Base64 and output stores to own buffer. + * + * \note Input data needn't be terminated with '\0'. + * + * \note Input data must be continuous Base64 string! + * + * \note Output buffer should be deallocated after use. + * + * \param in Input text data. + * \param in_len Length of input string. + * \param out Output data buffer. + * + * \retval >=0 length of output data. + * \retval KNOT_E* if error. + */ +int32_t knot_base64_decode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out); + +/*! @} */ diff --git a/src/contrib/base64url.c b/src/contrib/base64url.c new file mode 100644 index 0000000..34e2bbe --- /dev/null +++ b/src/contrib/base64url.c @@ -0,0 +1,287 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "contrib/base64url.h" +#include "libknot/errcode.h" + +#include <stdlib.h> +#include <stdint.h> +#include <ctype.h> + +/*! \brief Maximal length of binary input to Base64url encoding. */ +#define MAX_BIN_DATA_LEN ((INT32_MAX / 4) * 3) + +/*! \brief Base64url padding character. */ +static const uint8_t base64url_pad = '\0'; +/*! \brief Base64 alphabet. */ +static const uint8_t base64url_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +/*! \brief Indicates bad Base64 character. */ +#define KO 255 +/*! \brief Indicates Base64 padding character. */ +#define PD 64 + +/*! \brief Transformation and validation table for decoding Base64. */ +static const uint8_t base64url_dec[256] = { + [ 0] = PD, [ 43] = KO, ['V'] = 21, [129] = KO, [172] = KO, [215] = KO, + [ 1] = KO, [ 44] = KO, ['W'] = 22, [130] = KO, [173] = KO, [216] = KO, + [ 2] = KO, ['-'] = 62, ['X'] = 23, [131] = KO, [174] = KO, [217] = KO, + [ 3] = KO, [ 46] = KO, ['Y'] = 24, [132] = KO, [175] = KO, [218] = KO, + [ 4] = KO, [ 47] = KO, ['Z'] = 25, [133] = KO, [176] = KO, [219] = KO, + [ 5] = KO, ['0'] = 52, [ 91] = KO, [134] = KO, [177] = KO, [220] = KO, + [ 6] = KO, ['1'] = 53, [ 92] = KO, [135] = KO, [178] = KO, [221] = KO, + [ 7] = KO, ['2'] = 54, [ 93] = KO, [136] = KO, [179] = KO, [222] = KO, + [ 8] = KO, ['3'] = 55, [ 94] = KO, [137] = KO, [180] = KO, [223] = KO, + [ 9] = KO, ['4'] = 56, ['_'] = 63, [138] = KO, [181] = KO, [224] = KO, + [ 10] = KO, ['5'] = 57, [ 96] = KO, [139] = KO, [182] = KO, [225] = KO, + [ 11] = KO, ['6'] = 58, ['a'] = 26, [140] = KO, [183] = KO, [226] = KO, + [ 12] = KO, ['7'] = 59, ['b'] = 27, [141] = KO, [184] = KO, [227] = KO, + [ 13] = KO, ['8'] = 60, ['c'] = 28, [142] = KO, [185] = KO, [228] = KO, + [ 14] = KO, ['9'] = 61, ['d'] = 29, [143] = KO, [186] = KO, [229] = KO, + [ 15] = KO, [ 58] = KO, ['e'] = 30, [144] = KO, [187] = KO, [230] = KO, + [ 16] = KO, [ 59] = KO, ['f'] = 31, [145] = KO, [188] = KO, [231] = KO, + [ 17] = KO, [ 60] = KO, ['g'] = 32, [146] = KO, [189] = KO, [232] = KO, + [ 18] = KO, [ 61] = KO, ['h'] = 33, [147] = KO, [190] = KO, [233] = KO, + [ 19] = KO, [ 62] = KO, ['i'] = 34, [148] = KO, [191] = KO, [234] = KO, + [ 20] = KO, [ 63] = KO, ['j'] = 35, [149] = KO, [192] = KO, [235] = KO, + [ 21] = KO, [ 64] = KO, ['k'] = 36, [150] = KO, [193] = KO, [236] = KO, + [ 22] = KO, ['A'] = 0, ['l'] = 37, [151] = KO, [194] = KO, [237] = KO, + [ 23] = KO, ['B'] = 1, ['m'] = 38, [152] = KO, [195] = KO, [238] = KO, + [ 24] = KO, ['C'] = 2, ['n'] = 39, [153] = KO, [196] = KO, [239] = KO, + [ 25] = KO, ['D'] = 3, ['o'] = 40, [154] = KO, [197] = KO, [240] = KO, + [ 26] = KO, ['E'] = 4, ['p'] = 41, [155] = KO, [198] = KO, [241] = KO, + [ 27] = KO, ['F'] = 5, ['q'] = 42, [156] = KO, [199] = KO, [242] = KO, + [ 28] = KO, ['G'] = 6, ['r'] = 43, [157] = KO, [200] = KO, [243] = KO, + [ 29] = KO, ['H'] = 7, ['s'] = 44, [158] = KO, [201] = KO, [244] = KO, + [ 30] = KO, ['I'] = 8, ['t'] = 45, [159] = KO, [202] = KO, [245] = KO, + [ 31] = KO, ['J'] = 9, ['u'] = 46, [160] = KO, [203] = KO, [246] = KO, + [ 32] = KO, ['K'] = 10, ['v'] = 47, [161] = KO, [204] = KO, [247] = KO, + [ 33] = KO, ['L'] = 11, ['w'] = 48, [162] = KO, [205] = KO, [248] = KO, + [ 34] = KO, ['M'] = 12, ['x'] = 49, [163] = KO, [206] = KO, [249] = KO, + [ 35] = KO, ['N'] = 13, ['y'] = 50, [164] = KO, [207] = KO, [250] = KO, + [ 36] = KO, ['O'] = 14, ['z'] = 51, [165] = KO, [208] = KO, [251] = KO, + ['%'] = KO, ['P'] = 15, [123] = KO, [166] = KO, [209] = KO, [252] = KO, + [ 38] = KO, ['Q'] = 16, [124] = KO, [167] = KO, [210] = KO, [253] = KO, + [ 39] = KO, ['R'] = 17, [125] = KO, [168] = KO, [211] = KO, [254] = KO, + [ 40] = KO, ['S'] = 18, [126] = KO, [169] = KO, [212] = KO, [255] = KO, + [ 41] = KO, ['T'] = 19, [127] = KO, [170] = KO, [213] = KO, + [ 42] = KO, ['U'] = 20, [128] = KO, [171] = KO, [214] = KO, +}; + +int32_t knot_base64url_encode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len) +{ + // Checking inputs. + if (in == NULL || out == NULL) { + return KNOT_EINVAL; + } + if (in_len > MAX_BIN_DATA_LEN || out_len < ((in_len + 2) / 3) * 4) { + return KNOT_ERANGE; + } + + uint8_t rest_len = in_len % 3; + const uint8_t *stop = in + in_len - rest_len; + uint8_t *text = out; + + // Encoding loop takes 3 bytes and creates 4 characters. + while (in < stop) { + text[0] = base64url_enc[in[0] >> 2]; + text[1] = base64url_enc[(in[0] & 0x03) << 4 | in[1] >> 4]; + text[2] = base64url_enc[(in[1] & 0x0F) << 2 | in[2] >> 6]; + text[3] = base64url_enc[in[2] & 0x3F]; + text += 4; + in += 3; + } + + // Processing of padding, if any. + switch (rest_len) { + case 2: + text[0] = base64url_enc[in[0] >> 2]; + text[1] = base64url_enc[(in[0] & 0x03) << 4 | in[1] >> 4]; + text[2] = base64url_enc[(in[1] & 0x0F) << 2]; + text[3] = base64url_pad; + text += 3; + break; + case 1: + text[0] = base64url_enc[in[0] >> 2]; + text[1] = base64url_enc[(in[0] & 0x03) << 4]; + text[2] = base64url_pad; + text[3] = base64url_pad; + text += 2; + break; + } + return (text - out); +} + +int32_t knot_base64url_encode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out) +{ + // Checking inputs. + if (out == NULL) { + return KNOT_EINVAL; + } + if (in_len > MAX_BIN_DATA_LEN) { + return KNOT_ERANGE; + } + + // Compute output buffer length. + uint32_t out_len = ((in_len + 2) / 3) * 4; + + // Allocate output buffer. + *out = malloc(out_len); + if (*out == NULL) { + return KNOT_ENOMEM; + } + + // Encode data. + int32_t ret = knot_base64url_encode(in, in_len, *out, out_len); + if (ret < 0) { + free(*out); + *out = NULL; + } + + return ret; +} + +int32_t knot_base64url_decode(const uint8_t *in, + uint32_t in_len, + uint8_t *out, + const uint32_t out_len) +{ + // Checking inputs. + if (in == NULL || out == NULL) { + return KNOT_EINVAL; + } + + // cut up to two "%3d" from the end of input + int pad3d = 0; + const uint8_t *end = in + in_len; + char *perc3d = "d3%d3%", *stop3d = perc3d + 6; + while (end != in && perc3d != stop3d && tolower(*--end) == *perc3d) { + if (*perc3d++ == '%') { + in_len -= 3; + pad3d++; + } + } + + if (in_len > INT32_MAX || out_len < ((in_len + 3) / 4) * 3) { + return KNOT_ERANGE; + } + + const uint8_t *stop = in + in_len; + uint8_t *bin = out; + uint8_t pad_len = 0; + uint8_t c1, c2, c3, c4; + + // Decoding loop takes 4 characters and creates 3 bytes. + while (in < stop) { + // Filling and transforming 4 Base64 chars. + c1 = base64url_dec[in[0]] ; + c2 = base64url_dec[in[1]] ; + c3 = (in + 2 < stop) ? base64url_dec[in[2]] : PD; + c4 = (in + 3 < stop) ? base64url_dec[in[3]] : PD; + + // Check 1. and 2. chars if are not padding + if (c1 >= PD || c2 >= PD) { + return KNOT_BASE64_ECHAR; + } + // Check 3. char if is bad or padding. + else if (c3 >= PD) { + if (c3 == PD) { + pad_len = 2; + } else { + return KNOT_BASE64_ECHAR; + } + } + // Check 3. char if is bad or padding. + else if (c4 >= PD) { + if (c4 == PD) { + pad_len = 1; + } else { + return KNOT_BASE64_ECHAR; + } + } + + if (pad_len > 0 && in <= stop - 4) { + return KNOT_BASE64_ECHAR; + } + + // Computing of output data based on padding length. + switch (pad_len) { + case 0: + bin[2] = (c3 << 6) + c4; + // FALLTHROUGH + case 1: + bin[1] = (c2 << 4) + (c3 >> 2); + // FALLTHROUGH + case 2: + bin[0] = (c1 << 2) + (c2 >> 4); + } + + // Update output end. + switch (pad_len) { + case 0: + bin += 3; + break; + case 1: + bin += 2; + goto end; + case 2: + bin += 1; + goto end; + } + + in += 4; + } + +end: + if (pad3d > pad_len) { + return KNOT_BASE64_ECHAR; + } + return (bin - out); +} + +int32_t knot_base64url_decode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out) +{ + // Checking inputs. + if (out == NULL) { + return KNOT_EINVAL; + } + + // Compute output buffer length. + uint32_t out_len = ((in_len + 3) / 4) * 3; + + // Allocate output buffer. + *out = malloc(out_len); + if (*out == NULL) { + return KNOT_ENOMEM; + } + + // Decode data. + int32_t ret = knot_base64url_decode(in, in_len, *out, out_len); + if (ret < 0) { + free(*out); + *out = NULL; + } + + return ret; +} diff --git a/src/contrib/base64url.h b/src/contrib/base64url.h new file mode 100644 index 0000000..ba4bb43 --- /dev/null +++ b/src/contrib/base64url.h @@ -0,0 +1,103 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Base64url implementation (RFC 4648). + */ + +#pragma once + +#include <stdint.h> + +/*! + * \brief Encodes binary data using Base64. + * + * \note Output data buffer contains Base64 text string which isn't + * terminated with '\0'! + * + * \param in Input binary data. + * \param in_len Length of input data. + * \param out Output data buffer. + * \param out_len Size of output buffer. + * + * \retval >=0 length of output string. + * \retval KNOT_E* if error. + */ +int32_t knot_base64url_encode(const uint8_t *in, + const uint32_t in_len, + uint8_t *out, + const uint32_t out_len); + +/*! + * \brief Encodes binary data using Base64 and output stores to own buffer. + * + * \note Output data buffer contains Base64 text string which isn't + * terminated with '\0'! + * + * \note Output buffer should be deallocated after use. + * + * \param in Input binary data. + * \param in_len Length of input data. + * \param out Output data buffer. + * + * \retval >=0 length of output string. + * \retval KNOT_E* if error. + */ +int32_t knot_base64url_encode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out); + +/*! + * \brief Decodes text data using Base64. + * + * \note Input data needn't be terminated with '\0'. + * + * \note Input data must be continuous Base64 string! + * + * \param in Input text data. + * \param in_len Length of input string. + * \param out Output data buffer. + * \param out_len Size of output buffer. + * + * \retval >=0 length of output data. + * \retval KNOT_E* if error. + */ +int32_t knot_base64url_decode(const uint8_t *in, + uint32_t in_len, + uint8_t *out, + const uint32_t out_len); + +/*! + * \brief Decodes text data using Base64 and output stores to own buffer. + * + * \note Input data needn't be terminated with '\0'. + * + * \note Input data must be continuous Base64 string! + * + * \note Output buffer should be deallocated after use. + * + * \param in Input text data. + * \param in_len Length of input string. + * \param out Output data buffer. + * + * \retval >=0 length of output data. + * \retval KNOT_E* if error. + */ +int32_t knot_base64url_decode_alloc(const uint8_t *in, + const uint32_t in_len, + uint8_t **out); + +/*! @} */ diff --git a/src/contrib/color.h b/src/contrib/color.h new file mode 100644 index 0000000..6a7ff6a --- /dev/null +++ b/src/contrib/color.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#define COL_RST(active) ((active) ? "\x1B[0m" : "") + +#define COL_BOLD(active) ((active) ? "\x1B[1m" : "") +#define COL_DIM(active) ((active) ? "\x1B[2m" : "") +#define COL_UNDR(active) ((active) ? "\x1B[4m" : "") + +#define COL_RED(active) ((active) ? "\x1B[31m" : "") +#define COL_GRN(active) ((active) ? "\x1B[32m" : "") +#define COL_YELW(active) ((active) ? "\x1B[93m" : "") +#define COL_BLUE(active) ((active) ? "\x1B[34m" : "") +#define COL_MGNT(active) ((active) ? "\x1B[35m" : "") +#define COL_CYAN(active) ((active) ? "\x1B[36m" : "") +#define COL_WHIT(active) ((active) ? "\x1B[97m" : "") diff --git a/src/contrib/conn_pool.c b/src/contrib/conn_pool.c new file mode 100644 index 0000000..8367555 --- /dev/null +++ b/src/contrib/conn_pool.c @@ -0,0 +1,243 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "contrib/conn_pool.h" + +#include "contrib/sockaddr.h" + +conn_pool_t *global_conn_pool = NULL; + +static int pool_pop(conn_pool_t *pool, size_t i); + +/*! + * \brief Try to get an open connection older than specified timestamp. + * + * \param pool Pool to search in. + * \param older_than Timestamp that the connection must be older than. + * \param next_oldest Out: the timestamp of the oldest connection (other than the returned). + * + * \return -1 if error (no such connection), >= 0 connection file descriptor. + * + * \warning The returned connection is not necessarily the oldest one. + */ +static int get_old(conn_pool_t *pool, + knot_time_t older_than, + knot_time_t *next_oldest) +{ + assert(pool); + + *next_oldest = 0; + + int fd = -1; + pthread_mutex_lock(&pool->mutex); + + for (size_t i = 0; i < pool->capacity; i++) { + knot_time_t la = pool->conns[i].last_active; + if (fd == -1 && knot_time_cmp(la, older_than) < 0) { + fd = pool_pop(pool, i); + } else if (knot_time_cmp(la, *next_oldest) < 0) { + *next_oldest = la; + } + } + + pthread_mutex_unlock(&pool->mutex); + return fd; +} + +static void *closing_thread(void *_arg) +{ + conn_pool_t *pool = _arg; + + while (true) { + knot_time_t now = knot_time(), next = 0; + knot_timediff_t timeout = conn_pool_timeout(pool, 0); + assert(timeout != 0); + + while (true) { + int old_fd = get_old(pool, now - timeout + 1, &next); + if (old_fd >= 0) { + close(old_fd); + } else { + break; + } + } + + if (next == 0) { + sleep(timeout); + } else { + sleep(next + timeout - now); + } + } + + return NULL; // we never get here since the thread will be cancelled instead +} + +conn_pool_t *conn_pool_init(size_t capacity, knot_timediff_t timeout) +{ + if (capacity == 0 || timeout == 0) { + return NULL; + } + + conn_pool_t *pool = calloc(1, sizeof(*pool) + capacity * sizeof(pool->conns[0])); + if (pool != NULL) { + pool->capacity = capacity; + pool->timeout = timeout; + if (pthread_mutex_init(&pool->mutex, 0) != 0) { + free(pool); + return NULL; + } + if (pthread_create(&pool->closing_thread, NULL, closing_thread, pool) != 0) { + pthread_mutex_destroy(&pool->mutex); + free(pool); + return NULL; + } + } + return pool; +} + +void conn_pool_deinit(conn_pool_t *pool) +{ + if (pool != NULL) { + pthread_cancel(pool->closing_thread); + pthread_join(pool->closing_thread, NULL); + + int fd; + knot_time_t unused; + while ((fd = get_old(pool, 0, &unused)) >= 0) { + close(fd); + } + + pthread_mutex_destroy(&pool->mutex); + free(pool); + } +} + +knot_timediff_t conn_pool_timeout(conn_pool_t *pool, + knot_timediff_t new_timeout) +{ + if (pool == NULL) { + return 0; + } + + pthread_mutex_lock(&pool->mutex); + + knot_timediff_t prev = pool->timeout; + if (new_timeout != 0) { + pool->timeout = new_timeout; + } + + pthread_mutex_unlock(&pool->mutex); + return prev; +} + +static int pool_pop(conn_pool_t *pool, size_t i) +{ + conn_pool_memb_t *conn = &pool->conns[i]; + assert(conn->last_active != 0); + assert(pool->usage > 0); + int fd = conn->fd; + memset(conn, 0, sizeof(*conn)); + pool->usage--; + return fd; +} + +int conn_pool_get(conn_pool_t *pool, + struct sockaddr_storage *src, + struct sockaddr_storage *dst) +{ + if (pool == NULL) { + return -1; + } + + int fd = -1; + pthread_mutex_lock(&pool->mutex); + + for (size_t i = 0; i < pool->capacity; i++) { + if (pool->conns[i].last_active != 0 && + sockaddr_cmp(&pool->conns[i].dst, dst, false) == 0 && + sockaddr_cmp(&pool->conns[i].src, src, true) == 0) { + fd = pool_pop(pool, i); + break; + } + } + + pthread_mutex_unlock(&pool->mutex); + + if (fd >= 0) { + uint8_t unused; + int peek = recv(fd, &unused, 1, MSG_PEEK | MSG_DONTWAIT); + if (peek >= 0) { // closed or pending data + close(fd); + fd = -1; + } + } + + return fd; +} + +static void pool_push(conn_pool_t *pool, size_t i, + struct sockaddr_storage *src, + struct sockaddr_storage *dst, + int fd) +{ + conn_pool_memb_t *conn = &pool->conns[i]; + assert(conn->last_active == 0); + assert(pool->usage < pool->capacity); + conn->last_active = knot_time(); + conn->fd = fd; + memcpy(&conn->src, src, sizeof(conn->src)); + memcpy(&conn->dst, dst, sizeof(conn->dst)); + pool->usage++; +} + +int conn_pool_put(conn_pool_t *pool, + struct sockaddr_storage *src, + struct sockaddr_storage *dst, + int fd) +{ + if (pool == NULL || pool->capacity == 0) { + return fd; + } + + knot_time_t oldest_time = 0; + size_t oldest_i = pool->capacity; + + pthread_mutex_lock(&pool->mutex); + + for (size_t i = 0; i < pool->capacity; i++) { + knot_time_t la = pool->conns[i].last_active; + if (la == 0) { + pool_push(pool, i, src, dst, fd); + pthread_mutex_unlock(&pool->mutex); + return -1; + } else if (knot_time_cmp(la, oldest_time) < 0) { + oldest_time = la; + oldest_i = i; + } + } + + assert(oldest_i < pool->capacity); + int oldest_fd = pool_pop(pool, oldest_i); + pool_push(pool, oldest_i, src, dst, fd); + pthread_mutex_unlock(&pool->mutex); + return oldest_fd; +} diff --git a/src/contrib/conn_pool.h b/src/contrib/conn_pool.h new file mode 100644 index 0000000..f1f2c6c --- /dev/null +++ b/src/contrib/conn_pool.h @@ -0,0 +1,100 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <pthread.h> +#include <stdbool.h> +#include <sys/socket.h> + +#include "contrib/time.h" + +typedef struct { + struct sockaddr_storage src; + struct sockaddr_storage dst; + int fd; + knot_time_t last_active; +} conn_pool_memb_t; + +typedef struct { + size_t capacity; + size_t usage; + knot_timediff_t timeout; + pthread_mutex_t mutex; + pthread_t closing_thread; + conn_pool_memb_t conns[]; +} conn_pool_t; + +extern conn_pool_t *global_conn_pool; + +/*! + * \brief Allocate connection pool. + * + * \param capacity Connection pool capacity (must be positive number). + * \param timeout Connection timeout (must be positive number). + * + * \return Connection pool or NULL if error. + */ +conn_pool_t *conn_pool_init(size_t capacity, knot_timediff_t timeout); + +/*! + * \brief Deallocate the pool, close all connections, terminate closing thread. + * + * \param pool Connection pool. + */ +void conn_pool_deinit(conn_pool_t *pool); + +/*! + * \brief Get and/or set connection timeout. + * + * \param pool Connection pool. + * \param new_timeout Optional: set new timeout (if positive number). + * + * \return Previous value of timeout. + */ +knot_timediff_t conn_pool_timeout(conn_pool_t *pool, + knot_timediff_t new_timeout); + +/*! + * \brief Try to get an open connection if present, check if alive. + * + * \param pool Pool to search in. + * \param src Connection source address. + * \param dst Connection destination address. + * + * \retval -1 If error (no such connection). + * \return >= 0 File descriptor of the connection. + */ +int conn_pool_get(conn_pool_t *pool, + struct sockaddr_storage *src, + struct sockaddr_storage *dst); + +/*! + * \brief Put an open connection to the pool, possibly displacing the oldest one there. + * + * \param pool Pool to insert into. + * \param src Connestion source address. + * \param dst Connection destination adress. + * \param fd Connection file descriptor. + * + * \retval -1 If connection stored to free slot. + * \retval fd If not able to store connection. + * \return >= 0 File descriptor of the displaced old connection. + */ +int conn_pool_put(conn_pool_t *pool, + struct sockaddr_storage *src, + struct sockaddr_storage *dst, + int fd); diff --git a/src/contrib/ctype.h b/src/contrib/ctype.h new file mode 100644 index 0000000..d1e9d27 --- /dev/null +++ b/src/contrib/ctype.h @@ -0,0 +1,193 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Locale-independent ctype functions. + */ + +#pragma once + +#include <ctype.h> +#include <stdbool.h> +#include <stdint.h> + +enum { + CT_DIGIT = 1 << 0, + CT_UPPER = 1 << 1, + CT_LOWER = 1 << 2, + CT_XDIGT = 1 << 3, + CT_PUNCT = 1 << 4, + CT_PRINT = 1 << 5, + CT_SPACE = 1 << 6, +}; + +static const uint8_t char_mask[256] = { + // 0 - 8 + ['\t'] = CT_SPACE, + ['\n'] = CT_SPACE, + ['\v'] = CT_SPACE, + ['\f'] = CT_SPACE, + ['\r'] = CT_SPACE, + // 14 - 31 + [' '] = CT_PRINT | CT_SPACE, + + ['!'] = CT_PRINT | CT_PUNCT, + ['"'] = CT_PRINT | CT_PUNCT, + ['#'] = CT_PRINT | CT_PUNCT, + ['$'] = CT_PRINT | CT_PUNCT, + ['%'] = CT_PRINT | CT_PUNCT, + ['&'] = CT_PRINT | CT_PUNCT, + ['\''] = CT_PRINT | CT_PUNCT, + ['('] = CT_PRINT | CT_PUNCT, + [')'] = CT_PRINT | CT_PUNCT, + ['*'] = CT_PRINT | CT_PUNCT, + ['+'] = CT_PRINT | CT_PUNCT, + [','] = CT_PRINT | CT_PUNCT, + ['-'] = CT_PRINT | CT_PUNCT, + ['.'] = CT_PRINT | CT_PUNCT, + ['/'] = CT_PRINT | CT_PUNCT, + + ['0'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['1'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['2'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['3'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['4'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['5'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['6'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['7'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['8'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + ['9'] = CT_PRINT | CT_DIGIT | CT_XDIGT, + + [':'] = CT_PRINT | CT_PUNCT, + [';'] = CT_PRINT | CT_PUNCT, + ['<'] = CT_PRINT | CT_PUNCT, + ['='] = CT_PRINT | CT_PUNCT, + ['>'] = CT_PRINT | CT_PUNCT, + ['?'] = CT_PRINT | CT_PUNCT, + ['@'] = CT_PRINT | CT_PUNCT, + + ['A'] = CT_PRINT | CT_UPPER | CT_XDIGT, + ['B'] = CT_PRINT | CT_UPPER | CT_XDIGT, + ['C'] = CT_PRINT | CT_UPPER | CT_XDIGT, + ['D'] = CT_PRINT | CT_UPPER | CT_XDIGT, + ['E'] = CT_PRINT | CT_UPPER | CT_XDIGT, + ['F'] = CT_PRINT | CT_UPPER | CT_XDIGT, + ['G'] = CT_PRINT | CT_UPPER, + ['H'] = CT_PRINT | CT_UPPER, + ['I'] = CT_PRINT | CT_UPPER, + ['J'] = CT_PRINT | CT_UPPER, + ['K'] = CT_PRINT | CT_UPPER, + ['L'] = CT_PRINT | CT_UPPER, + ['M'] = CT_PRINT | CT_UPPER, + ['N'] = CT_PRINT | CT_UPPER, + ['O'] = CT_PRINT | CT_UPPER, + ['P'] = CT_PRINT | CT_UPPER, + ['Q'] = CT_PRINT | CT_UPPER, + ['R'] = CT_PRINT | CT_UPPER, + ['S'] = CT_PRINT | CT_UPPER, + ['T'] = CT_PRINT | CT_UPPER, + ['U'] = CT_PRINT | CT_UPPER, + ['V'] = CT_PRINT | CT_UPPER, + ['W'] = CT_PRINT | CT_UPPER, + ['X'] = CT_PRINT | CT_UPPER, + ['Y'] = CT_PRINT | CT_UPPER, + ['Z'] = CT_PRINT | CT_UPPER, + + ['['] = CT_PRINT | CT_PUNCT, + ['\\'] = CT_PRINT | CT_PUNCT, + [']'] = CT_PRINT | CT_PUNCT, + ['^'] = CT_PRINT | CT_PUNCT, + ['_'] = CT_PRINT | CT_PUNCT, + ['`'] = CT_PRINT | CT_PUNCT, + + ['a'] = CT_PRINT | CT_LOWER | CT_XDIGT, + ['b'] = CT_PRINT | CT_LOWER | CT_XDIGT, + ['c'] = CT_PRINT | CT_LOWER | CT_XDIGT, + ['d'] = CT_PRINT | CT_LOWER | CT_XDIGT, + ['e'] = CT_PRINT | CT_LOWER | CT_XDIGT, + ['f'] = CT_PRINT | CT_LOWER | CT_XDIGT, + ['g'] = CT_PRINT | CT_LOWER, + ['h'] = CT_PRINT | CT_LOWER, + ['i'] = CT_PRINT | CT_LOWER, + ['j'] = CT_PRINT | CT_LOWER, + ['k'] = CT_PRINT | CT_LOWER, + ['l'] = CT_PRINT | CT_LOWER, + ['m'] = CT_PRINT | CT_LOWER, + ['n'] = CT_PRINT | CT_LOWER, + ['o'] = CT_PRINT | CT_LOWER, + ['p'] = CT_PRINT | CT_LOWER, + ['q'] = CT_PRINT | CT_LOWER, + ['r'] = CT_PRINT | CT_LOWER, + ['s'] = CT_PRINT | CT_LOWER, + ['t'] = CT_PRINT | CT_LOWER, + ['u'] = CT_PRINT | CT_LOWER, + ['v'] = CT_PRINT | CT_LOWER, + ['w'] = CT_PRINT | CT_LOWER, + ['x'] = CT_PRINT | CT_LOWER, + ['y'] = CT_PRINT | CT_LOWER, + ['z'] = CT_PRINT | CT_LOWER, + + ['{'] = CT_PRINT | CT_PUNCT, + ['|'] = CT_PRINT | CT_PUNCT, + ['}'] = CT_PRINT | CT_PUNCT, + ['~'] = CT_PRINT | CT_PUNCT, + // 127 - 255 +}; + +static inline bool is_alnum(uint8_t c) +{ + return char_mask[c] & (CT_DIGIT | CT_UPPER | CT_LOWER); +} + +static inline bool is_alpha(uint8_t c) +{ + return char_mask[c] & (CT_UPPER | CT_LOWER); +} + +static inline bool is_digit(uint8_t c) +{ + return char_mask[c] & CT_DIGIT; +} + +static inline bool is_xdigit(uint8_t c) +{ + return char_mask[c] & CT_XDIGT; +} + +static inline bool is_lower(uint8_t c) +{ + return char_mask[c] & CT_LOWER; +} + +static inline bool is_upper(uint8_t c) +{ + return char_mask[c] & CT_UPPER; +} + +static inline bool is_print(uint8_t c) +{ + return char_mask[c] & CT_PRINT; +} + +static inline bool is_punct(uint8_t c) +{ + return char_mask[c] & CT_PUNCT; +} + +static inline bool is_space(uint8_t c) +{ + return char_mask[c] & CT_SPACE; +} diff --git a/src/contrib/dnstap/convert.c b/src/contrib/dnstap/convert.c new file mode 100644 index 0000000..93ee8e9 --- /dev/null +++ b/src/contrib/dnstap/convert.c @@ -0,0 +1,142 @@ +/* Copyright (C) 2016 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <sys/socket.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <netinet/ip.h> + +#include "contrib/dnstap/convert.h" +#include "contrib/dnstap/dnstap.pb-c.h" + +/*! + * \brief Translation between real and Dnstap value. + */ +typedef struct mapping { + int real; + int dnstap; +} mapping_t; + +/*! + * \brief Mapping for network family. + */ +static const mapping_t SOCKET_FAMILY_MAPPING[] = { + { AF_INET, DNSTAP__SOCKET_FAMILY__INET }, + { AF_INET6, DNSTAP__SOCKET_FAMILY__INET6 }, + { 0 } +}; + +/*! + * \brief Mapping from network protocol. + */ +static const mapping_t SOCKET_PROTOCOL_MAPPING[] = { + { IPPROTO_UDP, DNSTAP__SOCKET_PROTOCOL__UDP }, + { IPPROTO_TCP, DNSTAP__SOCKET_PROTOCOL__TCP }, + { 0 } +}; + +/*! + * \brief Get Dnstap value for a given real value. + */ +static int encode(const mapping_t *mapping, int real) +{ + for (const mapping_t *m = mapping; m->real != 0; m += 1) { + if (m->real == real) { + return m->dnstap; + } + } + + return 0; +} + +/*! + * \brief Get real value for a given Dnstap value. + */ +static int decode(const mapping_t *mapping, int dnstap) +{ + for (const mapping_t *m = mapping; m->real != 0; m += 1) { + if (m->dnstap == dnstap) { + return m->real; + } + } + + return 0; +} + +/* -- public API ----------------------------------------------------------- */ + +Dnstap__SocketFamily dt_family_encode(int family) +{ + return encode(SOCKET_FAMILY_MAPPING, family); +} + +int dt_family_decode(Dnstap__SocketFamily dnstap_family) +{ + return decode(SOCKET_FAMILY_MAPPING, dnstap_family); +} + +Dnstap__SocketProtocol dt_protocol_encode(int protocol) +{ + return encode(SOCKET_PROTOCOL_MAPPING, protocol); +} + +int dt_protocol_decode(Dnstap__SocketProtocol dnstap_protocol) +{ + return decode(SOCKET_PROTOCOL_MAPPING, dnstap_protocol); +} + +bool dt_message_type_is_query(Dnstap__Message__Type type) +{ + switch (type) { + case DNSTAP__MESSAGE__TYPE__AUTH_QUERY: + case DNSTAP__MESSAGE__TYPE__CLIENT_QUERY: + case DNSTAP__MESSAGE__TYPE__FORWARDER_QUERY: + case DNSTAP__MESSAGE__TYPE__RESOLVER_QUERY: + case DNSTAP__MESSAGE__TYPE__STUB_QUERY: + case DNSTAP__MESSAGE__TYPE__TOOL_QUERY: + return true; + default: + return false; + } +} + +bool dt_message_type_is_response(Dnstap__Message__Type type) +{ + switch (type) { + case DNSTAP__MESSAGE__TYPE__AUTH_RESPONSE: + case DNSTAP__MESSAGE__TYPE__CLIENT_RESPONSE: + case DNSTAP__MESSAGE__TYPE__FORWARDER_RESPONSE: + case DNSTAP__MESSAGE__TYPE__RESOLVER_RESPONSE: + case DNSTAP__MESSAGE__TYPE__STUB_RESPONSE: + case DNSTAP__MESSAGE__TYPE__TOOL_RESPONSE: + return true; + default: + return false; + } +} + +bool dt_message_role_is_initiator(Dnstap__Message__Type type) +{ + switch (type) { + case DNSTAP__MESSAGE__TYPE__AUTH_QUERY: + case DNSTAP__MESSAGE__TYPE__AUTH_RESPONSE: + case DNSTAP__MESSAGE__TYPE__CLIENT_QUERY: + case DNSTAP__MESSAGE__TYPE__CLIENT_RESPONSE: + return false; + default: + return true; + } +} diff --git a/src/contrib/dnstap/convert.h b/src/contrib/dnstap/convert.h new file mode 100644 index 0000000..0e2a86a --- /dev/null +++ b/src/contrib/dnstap/convert.h @@ -0,0 +1,60 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Dnstap identifiers conversions. + */ + +#pragma once + +#include <stdbool.h> + +#include "contrib/dnstap/dnstap.pb-c.h" + +/*! + * \brief Get Dnstap socket family from the real one. + */ +Dnstap__SocketFamily dt_family_encode(int family); + +/*! + * \brief Get real socket family from the Dnstap one. + */ +int dt_family_decode(Dnstap__SocketFamily dnstap_family); + +/*! + * \brief Get Dnstap protocol from a real one. + */ +Dnstap__SocketProtocol dt_protocol_encode(int protocol); + +/*! + * \brief Get real protocol from the Dnstap one. + */ +int dt_protocol_decode(Dnstap__SocketProtocol dnstap_protocol); + +/*! + * Check if a message type is any type of a query. + */ +bool dt_message_type_is_query(Dnstap__Message__Type type); + +/*! + * Check if a message type is any type of a response. + */ +bool dt_message_type_is_response(Dnstap__Message__Type type); + +/*! + * Check if a message role is any type of an initiator. + */ +bool dt_message_role_is_initiator(Dnstap__Message__Type type); diff --git a/src/contrib/dnstap/dnstap.c b/src/contrib/dnstap/dnstap.c new file mode 100644 index 0000000..1516b4d --- /dev/null +++ b/src/contrib/dnstap/dnstap.c @@ -0,0 +1,41 @@ +/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <stdint.h> +#include <stdlib.h> + +#include "contrib/dnstap/dnstap.h" +#include "contrib/dnstap/dnstap.pb-c.h" + +#define DNSTAP_INITIAL_BUF_SIZE 256 + +uint8_t* dt_pack(const Dnstap__Dnstap *d, uint8_t **buf, size_t *sz) +{ + ProtobufCBufferSimple sbuf = { { NULL } }; + + sbuf.base.append = protobuf_c_buffer_simple_append; + sbuf.len = 0; + sbuf.alloced = DNSTAP_INITIAL_BUF_SIZE; + sbuf.data = malloc(sbuf.alloced); + if (sbuf.data == NULL) { + return NULL; + } + sbuf.must_free_data = 1; + + *sz = dnstap__dnstap__pack_to_buffer(d, (ProtobufCBuffer *) &sbuf); + *buf = sbuf.data; + return *buf; +} diff --git a/src/contrib/dnstap/dnstap.h b/src/contrib/dnstap/dnstap.h new file mode 100644 index 0000000..f7aecbc --- /dev/null +++ b/src/contrib/dnstap/dnstap.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \author Robert Edmonds <edmonds@fsi.io> + * + * \brief Public interface for dnstap. + */ + +#pragma once + +#include <stddef.h> +#include <stdint.h> + +#include "contrib/dnstap/dnstap.pb-c.h" + +/*! \brief Frame Streams "Content Type" value for dnstap. */ +#define DNSTAP_CONTENT_TYPE "protobuf:dnstap.Dnstap" + +/*! + * \brief Serializes a filled out dnstap protobuf struct. Dynamically allocates + * storage for the serialized frame. + * + * \note This function returns a copy of its parameter return value 'buf' to + * make error checking slightly easier. + * + * \param d dnstap protobuf struct. + * \param[out] buf Serialized frame. + * \param[out] sz Size in bytes of the serialized frame. + * + * \return Serialized frame. + * \retval NULL if error. + */ +uint8_t* dt_pack(const Dnstap__Dnstap *d, uint8_t **buf, size_t *sz); diff --git a/src/contrib/dnstap/dnstap.proto b/src/contrib/dnstap/dnstap.proto new file mode 100644 index 0000000..ea5c77a --- /dev/null +++ b/src/contrib/dnstap/dnstap.proto @@ -0,0 +1,270 @@ +// dnstap: flexible, structured event replication format for DNS software +// +// This file contains the protobuf schemas for the "dnstap" structured event +// replication format for DNS software. + +// Written in 2013-2014 by Farsight Security, Inc. +// +// To the extent possible under law, the author(s) have dedicated all +// copyright and related and neighboring rights to this file to the public +// domain worldwide. This file is distributed without any warranty. +// +// You should have received a copy of the CC0 Public Domain Dedication along +// with this file. If not, see: +// +// <http://creativecommons.org/publicdomain/zero/1.0/>. + +syntax = "proto2"; + +package dnstap; + +// "Dnstap": this is the top-level dnstap type, which is a "union" type that +// contains other kinds of dnstap payloads, although currently only one type +// of dnstap payload is defined. +// See: https://developers.google.com/protocol-buffers/docs/techniques#union +message Dnstap { + // DNS server identity. + // If enabled, this is the identity string of the DNS server which generated + // this message. Typically this would be the same string as returned by an + // "NSID" (RFC 5001) query. + optional bytes identity = 1; + + // DNS server version. + // If enabled, this is the version string of the DNS server which generated + // this message. Typically this would be the same string as returned by a + // "version.bind" query. + optional bytes version = 2; + + // Extra data for this payload. + // This field can be used for adding an arbitrary byte-string annotation to + // the payload. No encoding or interpretation is applied or enforced. + optional bytes extra = 3; + + // Identifies which field below is filled in. + enum Type { + MESSAGE = 1; + } + required Type type = 15; + + // One of the following will be filled in. + optional Message message = 14; +} + +// SocketFamily: the network protocol family of a socket. This specifies how +// to interpret "network address" fields. +enum SocketFamily { + INET = 1; // IPv4 (RFC 791) + INET6 = 2; // IPv6 (RFC 2460) +} + +// SocketProtocol: the transport protocol of a socket. This specifies how to +// interpret "transport port" fields. +enum SocketProtocol { + UDP = 1; // User Datagram Protocol (RFC 768) + TCP = 2; // Transmission Control Protocol (RFC 793) +} + +// Message: a wire-format (RFC 1035 section 4) DNS message and associated +// metadata. Applications generating "Message" payloads should follow +// certain requirements based on the MessageType, see below. +message Message { + + // There are eight types of "Message" defined that correspond to the + // four arrows in the following diagram, slightly modified from RFC 1035 + // section 2: + + // +---------+ +----------+ +--------+ + // | | query | | query | | + // | Stub |-SQ--------CQ->| Recursive|-RQ----AQ->| Auth. | + // | Resolver| | Server | | Name | + // | |<-SR--------CR-| |<-RR----AR-| Server | + // +---------+ response | | response | | + // +----------+ +--------+ + + // Each arrow has two Type values each, one for each "end" of each arrow, + // because these are considered to be distinct events. Each end of each + // arrow on the diagram above has been marked with a two-letter Type + // mnemonic. Clockwise from upper left, these mnemonic values are: + // + // SQ: STUB_QUERY + // CQ: CLIENT_QUERY + // RQ: RESOLVER_QUERY + // AQ: AUTH_QUERY + // AR: AUTH_RESPONSE + // RR: RESOLVER_RESPONSE + // CR: CLIENT_RESPONSE + // SR: STUB_RESPONSE + + // Two additional types of "Message" have been defined for the + // "forwarding" case where an upstream DNS server is responsible for + // further recursion. These are not shown on the diagram above, but have + // the following mnemonic values: + + // FQ: FORWARDER_QUERY + // FR: FORWARDER_RESPONSE + + // The "Message" Type values are defined below. + + enum Type { + // AUTH_QUERY is a DNS query message received from a resolver by an + // authoritative name server, from the perspective of the authoritative + // name server. + AUTH_QUERY = 1; + + // AUTH_RESPONSE is a DNS response message sent from an authoritative + // name server to a resolver, from the perspective of the authoritative + // name server. + AUTH_RESPONSE = 2; + + // RESOLVER_QUERY is a DNS query message sent from a resolver to an + // authoritative name server, from the perspective of the resolver. + // Resolvers typically clear the RD (recursion desired) bit when + // sending queries. + RESOLVER_QUERY = 3; + + // RESOLVER_RESPONSE is a DNS response message received from an + // authoritative name server by a resolver, from the perspective of + // the resolver. + RESOLVER_RESPONSE = 4; + + // CLIENT_QUERY is a DNS query message sent from a client to a DNS + // server which is expected to perform further recursion, from the + // perspective of the DNS server. The client may be a stub resolver or + // forwarder or some other type of software which typically sets the RD + // (recursion desired) bit when querying the DNS server. The DNS server + // may be a simple forwarding proxy or it may be a full recursive + // resolver. + CLIENT_QUERY = 5; + + // CLIENT_RESPONSE is a DNS response message sent from a DNS server to + // a client, from the perspective of the DNS server. The DNS server + // typically sets the RA (recursion available) bit when responding. + CLIENT_RESPONSE = 6; + + // FORWARDER_QUERY is a DNS query message sent from a downstream DNS + // server to an upstream DNS server which is expected to perform + // further recursion, from the perspective of the downstream DNS + // server. + FORWARDER_QUERY = 7; + + // FORWARDER_RESPONSE is a DNS response message sent from an upstream + // DNS server performing recursion to a downstream DNS server, from the + // perspective of the downstream DNS server. + FORWARDER_RESPONSE = 8; + + // STUB_QUERY is a DNS query message sent from a stub resolver to a DNS + // server, from the perspective of the stub resolver. + STUB_QUERY = 9; + + // STUB_RESPONSE is a DNS response message sent from a DNS server to a + // stub resolver, from the perspective of the stub resolver. + STUB_RESPONSE = 10; + + // TOOL_QUERY is a DNS query message sent from a DNS software tool to a + // DNS server, from the perspective of the tool. + TOOL_QUERY = 11; + + // TOOL_RESPONSE is a DNS response message received by a DNS software + // tool from a DNS server, from the perspective of the tool. + TOOL_RESPONSE = 12; + } + + // One of the Type values described above. + required Type type = 1; + + // One of the SocketFamily values described above. + optional SocketFamily socket_family = 2; + + // One of the SocketProtocol values described above. + optional SocketProtocol socket_protocol = 3; + + // The network address of the message initiator. + // For SocketFamily INET, this field is 4 octets (IPv4 address). + // For SocketFamily INET6, this field is 16 octets (IPv6 address). + optional bytes query_address = 4; + + // The network address of the message responder. + // For SocketFamily INET, this field is 4 octets (IPv4 address). + // For SocketFamily INET6, this field is 16 octets (IPv6 address). + optional bytes response_address = 5; + + // The transport port of the message initiator. + // This is a 16-bit UDP or TCP port number, depending on SocketProtocol. + optional uint32 query_port = 6; + + // The transport port of the message responder. + // This is a 16-bit UDP or TCP port number, depending on SocketProtocol. + optional uint32 response_port = 7; + + // The time at which the DNS query message was sent or received, depending + // on whether this is an AUTH_QUERY, RESOLVER_QUERY, or CLIENT_QUERY. + // This is the number of seconds since the UNIX epoch. + optional uint64 query_time_sec = 8; + + // The time at which the DNS query message was sent or received. + // This is the seconds fraction, expressed as a count of nanoseconds. + optional fixed32 query_time_nsec = 9; + + // The initiator's original wire-format DNS query message, verbatim. + optional bytes query_message = 10; + + // The "zone" or "bailiwick" pertaining to the DNS query message. + // This is a wire-format DNS domain name. + optional bytes query_zone = 11; + + // The time at which the DNS response message was sent or received, + // depending on whether this is an AUTH_RESPONSE, RESOLVER_RESPONSE, or + // CLIENT_RESPONSE. + // This is the number of seconds since the UNIX epoch. + optional uint64 response_time_sec = 12; + + // The time at which the DNS response message was sent or received. + // This is the seconds fraction, expressed as a count of nanoseconds. + optional fixed32 response_time_nsec = 13; + + // The responder's original wire-format DNS response message, verbatim. + optional bytes response_message = 14; +} + +// All fields except for 'type' in the Message schema are optional. +// It is recommended that at least the following fields be filled in for +// particular types of Messages. + +// AUTH_QUERY: +// socket_family, socket_protocol +// query_address, query_port +// query_message +// query_time_sec, query_time_nsec + +// AUTH_RESPONSE: +// socket_family, socket_protocol +// query_address, query_port +// query_time_sec, query_time_nsec +// response_message +// response_time_sec, response_time_nsec + +// RESOLVER_QUERY: +// socket_family, socket_protocol +// query_message +// query_time_sec, query_time_nsec +// query_zone +// response_address, response_port + +// RESOLVER_RESPONSE: +// socket_family, socket_protocol +// query_time_sec, query_time_nsec +// query_zone +// response_address, response_port +// response_message +// response_time_sec, response_time_nsec + +// CLIENT_QUERY: +// socket_family, socket_protocol +// query_message +// query_time_sec, query_time_nsec + +// CLIENT_RESPONSE: +// socket_family, socket_protocol +// query_time_sec, query_time_nsec +// response_message +// response_time_sec, response_time_nsec diff --git a/src/contrib/dnstap/message.c b/src/contrib/dnstap/message.c new file mode 100644 index 0000000..a5f798e --- /dev/null +++ b/src/contrib/dnstap/message.c @@ -0,0 +1,130 @@ +/* Copyright (C) 2017 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <netinet/in.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "libknot/errcode.h" + +#include "contrib/dnstap/convert.h" +#include "contrib/dnstap/message.h" + +static void set_address(const struct sockaddr *sockaddr, + ProtobufCBinaryData *addr, + protobuf_c_boolean *has_addr, + uint32_t *port, + protobuf_c_boolean *has_port) +{ + if (sockaddr == NULL) { + *has_addr = 0; + *has_port = 0; + return; + } + + *has_addr = 1; + *has_port = 1; + + if (sockaddr->sa_family == AF_INET) { + const struct sockaddr_in *sai; + sai = (const struct sockaddr_in *)sockaddr; + addr->len = sizeof(sai->sin_addr); + addr->data = (uint8_t *)&sai->sin_addr.s_addr; + *port = ntohs(sai->sin_port); + } else if (sockaddr->sa_family == AF_INET6) { + const struct sockaddr_in6 *sai6; + sai6 = (const struct sockaddr_in6 *)sockaddr; + addr->len = sizeof(sai6->sin6_addr); + addr->data = (uint8_t *)&sai6->sin6_addr.s6_addr; + *port = ntohs(sai6->sin6_port); + } +} + +static int get_family(const struct sockaddr *query_sa, + const struct sockaddr *response_sa) +{ + const struct sockaddr *source = query_sa ? query_sa : response_sa; + if (source == NULL) { + return 0; + } + + return dt_family_encode(source->sa_family); +} + +int dt_message_fill(Dnstap__Message *m, + const Dnstap__Message__Type type, + const struct sockaddr *query_sa, + const struct sockaddr *response_sa, + const int protocol, + const void *wire, + const size_t len_wire, + const struct timespec *mtime) +{ + if (m == NULL) { + return KNOT_EINVAL; + } + + memset(m, 0, sizeof(*m)); + + m->base.descriptor = &dnstap__message__descriptor; + + // Message.type + m->type = type; + + // Message.socket_family + m->socket_family = get_family(query_sa, response_sa); + m->has_socket_family = m->socket_family != 0; + + // Message.socket_protocol + m->socket_protocol = dt_protocol_encode(protocol); + m->has_socket_protocol = m->socket_protocol != 0; + + // Message addresses + set_address(query_sa, &m->query_address, &m->has_query_address, + &m->query_port, &m->has_query_port); + set_address(response_sa, &m->response_address, &m->has_response_address, + &m->response_port, &m->has_response_port); + + if (dt_message_type_is_query(type)) { + // Message.query_message + m->query_message.len = len_wire; + m->query_message.data = (uint8_t *)wire; + m->has_query_message = 1; + // Message.query_time_sec, Message.query_time_nsec + if (mtime != NULL) { + m->query_time_sec = mtime->tv_sec; + m->query_time_nsec = mtime->tv_nsec; + m->has_query_time_sec = 1; + m->has_query_time_nsec = 1; + } + } else if (dt_message_type_is_response(type)) { + // Message.response_message + m->response_message.len = len_wire; + m->response_message.data = (uint8_t *)wire; + m->has_response_message = 1; + // Message.response_time_sec, Message.response_time_nsec + if (mtime != NULL) { + m->response_time_sec = mtime->tv_sec; + m->response_time_nsec = mtime->tv_nsec; + m->has_response_time_sec = 1; + m->has_response_time_nsec = 1; + } + } + + return KNOT_EOK; +} diff --git a/src/contrib/dnstap/message.h b/src/contrib/dnstap/message.h new file mode 100644 index 0000000..b9e3aff --- /dev/null +++ b/src/contrib/dnstap/message.h @@ -0,0 +1,63 @@ +/* Copyright (C) 2017 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \author Robert Edmonds <edmonds@fsi.io> + * + * \brief Dnstap message interface. + */ + +#pragma once + +#include <sys/socket.h> +#include <sys/time.h> +#include <stddef.h> + +#include "contrib/dnstap/dnstap.pb-c.h" + +/*! + * \brief Fill a Dnstap__Message structure with the given parameters. + * + * \param[out] m + * Dnstap__Message structure to fill. Will be zeroed first. + * \param type + * One of the DNSTAP__MESSAGE__TYPE__* values. + * \param query_sa + * sockaddr_in or sockaddr_in6 to use when filling the 'socket_family', + * 'query_address', 'query_port' fields. + * \param response_sa + * sockaddr_in or sockaddr_in6 to use when filling the 'socket_family', + * 'response_address', 'response_port' fields. + * \param protocol + * \c IPPROTO_UDP or \c IPPROTO_TCP. + * \param wire + * Wire-format query message or response message (depending on 'type'). + * \param len_wire + * Length in bytes of 'wire'. + * \param mtime + * Message time. May be NULL. + * + * \retval KNOT_EOK + * \retval KNOT_EINVAL + */ +int dt_message_fill(Dnstap__Message *m, + const Dnstap__Message__Type type, + const struct sockaddr *query_sa, + const struct sockaddr *response_sa, + const int protocol, + const void *wire, + const size_t len_wire, + const struct timespec *mtime); diff --git a/src/contrib/dnstap/reader.c b/src/contrib/dnstap/reader.c new file mode 100644 index 0000000..dc1c053 --- /dev/null +++ b/src/contrib/dnstap/reader.c @@ -0,0 +1,103 @@ +/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "libknot/attribute.h" +#include "libknot/errcode.h" + +#include "contrib/dnstap/dnstap.h" +#include "contrib/dnstap/reader.h" + +dt_reader_t* dt_reader_create(const char *file_path) +{ + struct fstrm_file_options *fopt = NULL; + struct fstrm_reader_options *ropt = NULL; + dt_reader_t *reader = NULL; + fstrm_res res; + + reader = calloc(1, sizeof(dt_reader_t)); + if (reader == NULL) { + goto fail; + } + + // Open reader. + fopt = fstrm_file_options_init(); + fstrm_file_options_set_file_path(fopt, file_path); + ropt = fstrm_reader_options_init(); + fstrm_reader_options_add_content_type(ropt, + (const uint8_t *) DNSTAP_CONTENT_TYPE, + strlen(DNSTAP_CONTENT_TYPE)); + reader->fr = fstrm_file_reader_init(fopt, ropt); + fstrm_file_options_destroy(&fopt); + fstrm_reader_options_destroy(&ropt); + if (reader->fr == NULL) { + goto fail; + } + res = fstrm_reader_open(reader->fr); + if (res != fstrm_res_success) { + goto fail; + } + + return reader; +fail: + dt_reader_free(reader); + return NULL; +} + +void dt_reader_free(dt_reader_t *reader) +{ + if (reader == NULL) { + return; + } + + fstrm_reader_destroy(&reader->fr); + free(reader); +} + +int dt_reader_read(dt_reader_t *reader, Dnstap__Dnstap **d) +{ + fstrm_res res; + const uint8_t *data = NULL; + size_t len = 0; + + res = fstrm_reader_read(reader->fr, &data, &len); + if (res == fstrm_res_success) { + *d = dnstap__dnstap__unpack(NULL, len, data); + if (*d == NULL) { + return KNOT_ENOMEM; + } + } else if (res == fstrm_res_failure) { + return KNOT_ERROR; + } else if (res == fstrm_res_stop) { + return KNOT_EOF; + } + + return KNOT_EOK; +} + +void dt_reader_free_frame(_unused_ dt_reader_t *reader, Dnstap__Dnstap **frame_ptr) +{ + if (!*frame_ptr) { + return; + } + + dnstap__dnstap__free_unpacked(*frame_ptr, NULL); + *frame_ptr = NULL; +} diff --git a/src/contrib/dnstap/reader.h b/src/contrib/dnstap/reader.h new file mode 100644 index 0000000..cc6385f --- /dev/null +++ b/src/contrib/dnstap/reader.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2017 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Dnstap file reader. + */ + +#pragma once + +#include <fstrm.h> +#include <protobuf-c/protobuf-c.h> + +#include "contrib/dnstap/dnstap.pb-c.h" + +/*! \brief Structure for dnstap file reader. */ +typedef struct { + /*!< Input reader. */ + struct fstrm_reader *fr; +} dt_reader_t; + +/*! + * \brief Creates dnstap file reader structure. + * + * \param file_path Name of file to read input from. + * + * \retval reader if success. + * \retval NULL if error. + */ +dt_reader_t* dt_reader_create(const char *file_path); + +/*! + * \brief Close dnstap file reader. + * + * \param reader dnstap file reader structure. + */ +void dt_reader_free(dt_reader_t *reader); + +/*! + * \brief Read a dnstap protobuf from a dnstap file reader. + * + * Caller must deallocate the returned protobuf with the + * dnstap__dnstap__free_unpacked() function. + * + * \param[in] reader dnstap file reader structure. + * \param[out] d Unpacked dnstap protobuf. + * + * \retval KNOT_EOK + * \retval KNOT_ERROR + * \retval KNOT_EOF + * \retval KNOT_ENOMEM + */ +int dt_reader_read(dt_reader_t *reader, Dnstap__Dnstap **d); + +/*! + * \brief free the frame allocated by dt_read_data. + * + * \param reader Dnstap reader context. + * \param d The frame to be freed. + */ +void dt_reader_free_frame(dt_reader_t *reader, Dnstap__Dnstap **d); diff --git a/src/contrib/dnstap/writer.c b/src/contrib/dnstap/writer.c new file mode 100644 index 0000000..03961c1 --- /dev/null +++ b/src/contrib/dnstap/writer.c @@ -0,0 +1,120 @@ +/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "libknot/errcode.h" + +#include "contrib/dnstap/dnstap.h" +#include "contrib/dnstap/writer.h" + +dt_writer_t* dt_writer_create(const char *file_path, const char *version) +{ + struct fstrm_file_options *fopt = NULL; + struct fstrm_writer_options *wopt = NULL; + dt_writer_t *writer = NULL; + fstrm_res res; + + writer = calloc(1, sizeof(dt_writer_t)); + if (writer == NULL) { + goto fail; + } + + // Set "version". + if (version != NULL) { + writer->len_version = strlen(version); + writer->version = strdup(version); + if (!writer->version) { + goto fail; + } + } + + // Open writer. + fopt = fstrm_file_options_init(); + fstrm_file_options_set_file_path(fopt, file_path); + wopt = fstrm_writer_options_init(); + fstrm_writer_options_add_content_type(wopt, + (const uint8_t *) DNSTAP_CONTENT_TYPE, + strlen(DNSTAP_CONTENT_TYPE)); + writer->fw = fstrm_file_writer_init(fopt, wopt); + fstrm_file_options_destroy(&fopt); + fstrm_writer_options_destroy(&wopt); + if (writer->fw == NULL) { + goto fail; + } + + res = fstrm_writer_open(writer->fw); + if (res != fstrm_res_success) { + goto fail; + } + + return writer; +fail: + dt_writer_free(writer); + return NULL; +} + +void dt_writer_free(dt_writer_t *writer) +{ + if (writer == NULL) { + return; + } + + fstrm_writer_destroy(&writer->fw); + free(writer->version); + free(writer); +} + +int dt_writer_write(dt_writer_t *writer, const ProtobufCMessage *msg) +{ + Dnstap__Dnstap dnstap = DNSTAP__DNSTAP__INIT; + size_t len; + uint8_t *data; + + if (writer->fw == NULL) { + return KNOT_EOK; + } + + // Only handle dnstap/Message. + assert(msg->descriptor == &dnstap__message__descriptor); + + // Fill out 'dnstap'. + if (writer->version) { + dnstap.version.data = writer->version; + dnstap.version.len = writer->len_version; + dnstap.has_version = 1; + } + dnstap.type = DNSTAP__DNSTAP__TYPE__MESSAGE; + dnstap.message = (Dnstap__Message *)msg; + + // Serialize the dnstap frame. + if (!dt_pack(&dnstap, &data, &len)) { + return KNOT_ENOMEM; + } + + // Write the dnstap frame to the output stream. + if (fstrm_writer_write(writer->fw, data, len) != fstrm_res_success) { + return KNOT_ERROR; + } + + // Cleanup. + free(data); + + return KNOT_EOK; +} diff --git a/src/contrib/dnstap/writer.h b/src/contrib/dnstap/writer.h new file mode 100644 index 0000000..e6928c5 --- /dev/null +++ b/src/contrib/dnstap/writer.h @@ -0,0 +1,71 @@ +/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \author Robert Edmonds <edmonds@fsi.io> + * + * \brief Dnstap file writer. + */ + +#pragma once + +#include <fstrm.h> +#include <protobuf-c/protobuf-c.h> + +/*! \brief Structure for dnstap file writer. */ +typedef struct { + /*!< Output writer. */ + struct fstrm_writer *fw; + + /*!< dnstap "version" field. */ + void *version; + + /*!< length of dnstap "version" field. */ + size_t len_version; +} dt_writer_t; + +/*! + * \brief Creates dnstap file writer structure. + * + * \param file_path Name of file to write output to. + * \param version Version string of software. May be NULL. + * + * \retval writer if success. + * \retval NULL if error. + */ +dt_writer_t* dt_writer_create(const char *file_path, const char *version); + +/*! + * \brief Finish writing dnstap file writer and free resources. + * + * \param writer dnstap file writer structure. + */ +void dt_writer_free(dt_writer_t *writer); + +/*! + * \brief Write a protobuf to the dnstap file writer. + * + * Supported protobuf types for the 'msg' parameter: + * \c Dnstap__Message + * + * \param writer dnstap file writer structure. + * \param msg dnstap protobuf. Must be a supported type. + * + * \retval KNOT_EOK + * \retval KNOT_EINVAL + * \retval KNOT_ENOMEM + */ +int dt_writer_write(dt_writer_t *writer, const ProtobufCMessage *msg); diff --git a/src/contrib/files.c b/src/contrib/files.c new file mode 100644 index 0000000..c753ca8 --- /dev/null +++ b/src/contrib/files.c @@ -0,0 +1,254 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <dirent.h> +#include <fcntl.h> +#include <ftw.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "contrib/files.h" +#include "contrib/string.h" +#include "libknot/errcode.h" + +#if defined(MAXBSIZE) + #define BUFSIZE MAXBSIZE +#else + #define BUFSIZE (64 * 1024) +#endif + +char* abs_path(const char *path, const char *base_dir) +{ + if (path == NULL) { + return NULL; + } else if (path[0] == '/') { + return strdup(path); + } else { + char *full_path; + if (base_dir == NULL) { + char *cwd = realpath("./", NULL); + full_path = sprintf_alloc("%s/%s", cwd, path); + free(cwd); + } else { + full_path = sprintf_alloc("%s/%s", base_dir, path); + } + return full_path; + } +} + +bool same_path(const char *path1, const char *path2) +{ + bool equal = false; + int err = 0; + + struct stat sb1; + if (stat(path1, &sb1) == 0) { + struct stat sb2; + if (stat(path2, &sb2) == 0) { + if (sb1.st_dev == sb2.st_dev && + sb1.st_ino == sb2.st_ino) { + equal = true; + } + } else { + err = errno; + } + } else { + err = errno; + } + + if (err != 0) { + // Can't compare real absolute paths, as stat() failed already. Try the best. + char *full_path1 = abs_path(path1, NULL); + char *full_path2 = abs_path(path2, NULL); + + if (strcmp(full_path1, full_path2) == 0) { + equal = true; + } + + free(full_path1); + free(full_path2); + } + + return equal; +} + +static int remove_file(const char *path, const struct stat *stat, int type, struct FTW *ftw) +{ + (void)stat; + (void)ftw; + if (type == FTW_DP) { + return rmdir(path); + } else { + return unlink(path); + } +} + +bool remove_path(const char *path) +{ + return (0 == nftw(path, remove_file, 1, FTW_DEPTH | FTW_PHYS)); +} + +int make_dir(const char *path, mode_t mode, bool ignore_existing) +{ + if (mkdir(path, mode) == 0) { + return KNOT_EOK; + } + + if (!ignore_existing || errno != EEXIST) { + return knot_map_errno(); + } + + assert(errno == EEXIST); + + struct stat st = { 0 }; + if (stat(path, &st) != 0) { + return knot_map_errno(); + } + + if (!S_ISDIR(st.st_mode)) { + return KNOT_EEXIST; + } + + return KNOT_EOK; +} + +int make_path(const char *path, mode_t mode) +{ + if (path == NULL) { + return KNOT_EINVAL; + } + + char *dir = strdup(path); + if (dir == NULL) { + return KNOT_ENOMEM; + } + + for (char *p = strchr(dir + 1, '/'); p != NULL; p = strchr(p + 1, '/')) { + *p = '\0'; + if (mkdir(dir, mode) == -1 && errno != EEXIST) { + free(dir); + return knot_map_errno(); + } + *p = '/'; + } + + free(dir); + + return KNOT_EOK; +} + +int open_tmp_file(const char *path, char **tmp_name, FILE **file, mode_t mode) +{ + int ret; + + *tmp_name = sprintf_alloc("%s.XXXXXX", path); + if (*tmp_name == NULL) { + ret = KNOT_ENOMEM; + goto open_tmp_failed; + } + + int fd = mkstemp(*tmp_name); + if (fd < 0) { + ret = knot_map_errno(); + goto open_tmp_failed; + } + + if (fchmod(fd, mode) != 0) { + ret = knot_map_errno(); + close(fd); + unlink(*tmp_name); + goto open_tmp_failed; + } + + *file = fdopen(fd, "w"); + if (*file == NULL) { + ret = knot_map_errno(); + close(fd); + unlink(*tmp_name); + goto open_tmp_failed; + } + + return KNOT_EOK; +open_tmp_failed: + free(*tmp_name); + *tmp_name = NULL; + *file = NULL; + + assert(ret != KNOT_EOK); + return ret; +} + +int copy_file(const char *dest, const char *src) +{ + if (dest == NULL || src == NULL) { + return KNOT_EINVAL; + } + + int ret = 0; + char *buf = NULL, *tmp_name = NULL; + FILE *file = NULL; + + FILE *from = fopen(src, "r"); + if (from == NULL) { + ret = errno == ENOENT ? KNOT_EFILE : knot_map_errno(); + goto done; + } + + buf = malloc(sizeof(*buf) * BUFSIZE); + if (buf == NULL) { + ret = KNOT_ENOMEM; + goto done; + } + + ret = open_tmp_file(dest, &tmp_name, &file, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP); + if (ret != KNOT_EOK) { + goto done; + } + + ssize_t cnt; + while ((cnt = fread(buf, sizeof(*buf), BUFSIZE, from)) != 0 && + (ret = (fwrite(buf, sizeof(*buf), cnt, file) == cnt))) { + } + + ret = !ret || ferror(from); + if (ret != 0) { + ret = knot_map_errno(); + unlink(tmp_name); + goto done; + } + + ret = rename(tmp_name, dest); + if (ret != 0) { + ret = knot_map_errno(); + unlink(tmp_name); + goto done; + } + ret = KNOT_EOK; + +done: + free(tmp_name); + if (file != NULL) { + fclose(file); + } + free(buf); + if (from != NULL) { + fclose(from); + } + return ret; +} diff --git a/src/contrib/files.h b/src/contrib/files.h new file mode 100644 index 0000000..16a0f44 --- /dev/null +++ b/src/contrib/files.h @@ -0,0 +1,77 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <stdio.h> +#include <sys/types.h> + +/*! + * Gets the absolute path. + * + * \note The result must be explicitly deallocated. + * + * \param[in] path Absolute path or a relative path suffix; a string. + * \param[in] base_dir Path prefix for a relative string. + * + * \return Absolute path string pointer. + */ +char* abs_path(const char *path, const char *base_dir); + +/*! + * Try to compare two paths whether they are identical. + * + * \note If any of the two paths doesn't physically exist, their identity can't + * be detected in some special corner cases. + * + * \param[in] path1 Absolute or a relative path (a file, a directory, etc.) + * \param[in] path2 Absolute or a relative path (a file, a directory, etc.) + * + * \return True if both paths are identical (if they point to the same inode), + * false otherwise. + */ +bool same_path(const char *path1, const char *path2); + +/*! + * \brief Delete file or directory (recursive). + * + * \return true on success, false when one or more files failed to be removed. + */ +bool remove_path(const char *path); + +/*! + * Equivalent to mkdir(2), can succeed if the directory already exists. + */ +int make_dir(const char *path, mode_t mode, bool ignore_existing); + +/*! + * Makes a directory part of the path with all parent directories if not exist. + */ +int make_path(const char *path, mode_t mode); + +/*! + * Creates and opens for writing a temporary file based on given path. + */ +int open_tmp_file(const char *path, char **tmp_name, FILE **file, mode_t mode); + +/*! + * Copies a file, possibly overwriting existing one, as an atomic operation. + * + * \return KNOT_EOK on success, KNOT_EFILE if the source file doesn't exist, + * \or other KNOT_E* values in case of other errors. + */ +int copy_file(const char *dest, const char *src); diff --git a/src/contrib/getline.c b/src/contrib/getline.c new file mode 100644 index 0000000..46cdff9 --- /dev/null +++ b/src/contrib/getline.c @@ -0,0 +1,60 @@ +/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +// FreeBSD POSIX2008 getline +#ifndef _WITH_GETLINE +#define _WITH_GETLINE +#endif + +#include "contrib/getline.h" + +#include <stdio.h> // getline or fgetln +#include <stdlib.h> // free +#include <string.h> // memcpy + +ssize_t knot_getline(char **lineptr, size_t *n, FILE *stream) +{ +#ifdef HAVE_GETLINE + return getline(lineptr, n, stream); +#else +#ifdef HAVE_FGETLN + size_t length = 0; + char *buffer = fgetln(stream, &length); + if (buffer == NULL) { + return -1; + } + + /* NOTE: Function fgetln doesn't return terminated string! + * Output buffer from the fgetln can't be freed. + */ + + // If the output buffer is not specified or is small, extend it. + if (*lineptr == NULL || *n <= length) { + char *tmp = realloc(*lineptr, length + 1); + if (tmp == NULL) { + return -1; + } + *lineptr = tmp; + *n = length + 1; + } + + memcpy(*lineptr, buffer, length); + (*lineptr)[length] = '\0'; + + return length; +#endif +#endif +} diff --git a/src/contrib/getline.h b/src/contrib/getline.h new file mode 100644 index 0000000..80f2db1 --- /dev/null +++ b/src/contrib/getline.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Multiplatform getline wrapper. + */ + +#pragma once + +#include <stdio.h> +#include <sys/types.h> + +/*! + * \brief Reads a line from a stream. + * + * This function has the same semantics as POSIX.1-2008 getline(). + * If necessary, the output buffer will be allocated/reallocated. + * + * \param lineptr Output buffer. + * \param n Output buffer size. + * \param stream Input stream. + * + * \retval Number of characters read, including new line delimiter, + * not including terminating. -1 on error or EOF. + */ +ssize_t knot_getline(char **lineptr, size_t *n, FILE *stream); diff --git a/src/contrib/json.c b/src/contrib/json.c new file mode 100644 index 0000000..8b8bda4 --- /dev/null +++ b/src/contrib/json.c @@ -0,0 +1,237 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "contrib/json.h" + +#include "contrib/string.h" + +#define MAX_DEPTH 8 + +enum { + BLOCK_INVALID = 0, + BLOCK_OBJECT, + BLOCK_LIST, +}; + +/*! One indented block of JSON. */ +struct block { + /*! Block type. */ + int type; + /*! Number of elements written. */ + int count; +}; + +struct jsonw { + /*! Output file stream. */ + FILE *out; + /*! Indentaiton string. */ + const char *indent; + /*! List to be used as a stack of blocks in progress. */ + struct block stack[MAX_DEPTH]; + /*! Index pointing to the top of the stack. */ + int top; + /*! Newline needed indication. */ + bool wrap; +}; + +static const char *DEFAULT_INDENT = "\t"; + +static void start_block(jsonw_t *w, int type) +{ + assert(w->top > 0); + + struct block b = { + .type = type, + .count = 0, + }; + + w->top -= 1; + w->stack[w->top] = b; +} + +static struct block *cur_block(jsonw_t *w) +{ + if (w && w->top < MAX_DEPTH) { + return &w->stack[w->top]; + } + return NULL; +} + +/*! Insert new line and indent for the next write. */ +static void wrap(jsonw_t *w) +{ + if (!w->wrap) { + w->wrap = true; + return; + } + + fputc('\n', w->out); + int level = MAX_DEPTH - w->top; + for (int i = 0; i < level; i++) { + fprintf(w->out, "%s", w->indent); + } +} + +static void end_block(jsonw_t *w) +{ + assert(w->top < MAX_DEPTH); + + w->top += 1; +} + +static void escaped_print(jsonw_t *w, const char *str) +{ + fputc('"', w->out); + for (const char *pos = str; *pos != '\0'; pos++) { + char c = *pos; + if (c == '\\' || c == '\"') { + fputc('\\', w->out); + } + fputc(c, w->out); + } + fputc('"', w->out); +} + +static void align_key(jsonw_t *w, const char *key) +{ + struct block *top = cur_block(w); + if (top && top->count++) { + fputc(',', w->out); + } + + wrap(w); + + if (key && key[0]) { + escaped_print(w, key); + fprintf(w->out, ": "); + } +} + +jsonw_t *jsonw_new(FILE *out, const char *indent) +{ + assert(out); + + jsonw_t *w = calloc(1, sizeof(*w)); + if (w == NULL) { + return w; + } + + w->out = out; + w->indent = indent ? indent : DEFAULT_INDENT; + w->top = MAX_DEPTH; + + return w; +} + +void jsonw_free(jsonw_t **w) +{ + if (w == NULL) { + return; + } + + wrap(*w); + + free(*w); + *w = NULL; +} + +void jsonw_object(jsonw_t *w, const char *key) +{ + assert(w); + + align_key(w, key); + fprintf(w->out, "{"); + start_block(w, BLOCK_OBJECT); +} + +void jsonw_list(jsonw_t *w, const char *key) +{ + assert(w); + + align_key(w, key); + fprintf(w->out, "["); + start_block(w, BLOCK_LIST); +} + +void jsonw_str(jsonw_t *w, const char *key, const char *value) +{ + assert(w); + + align_key(w, key); + escaped_print(w, value); +} + +void jsonw_ulong(jsonw_t *w, const char *key, unsigned long value) +{ + assert(w); + + align_key(w, key); + fprintf(w->out, "%lu", value); +} + +void jsonw_int(jsonw_t *w, const char *key, int value) +{ + assert(w); + + align_key(w, key); + fprintf(w->out, "%d", value); +} + + +void jsonw_bool(jsonw_t *w, const char *key, bool value) +{ + assert(w); + + align_key(w, key); + fprintf(w->out, "%s", value ? "true" : "false"); +} + +void jsonw_hex(jsonw_t *w, const char *key, const uint8_t *data, size_t len) +{ + assert(w); + + char *hex = bin_to_hex(data, len, true); + if (hex != NULL) { + jsonw_str(w, key, hex); + } + free(hex); +} + +void jsonw_end(jsonw_t *w) +{ + assert(w); + + struct block *top = cur_block(w); + if (top == NULL) { + return; + } + + end_block(w); + wrap(w); + + switch (top->type) { + case BLOCK_OBJECT: + fprintf(w->out, "}"); + break; + case BLOCK_LIST: + fprintf(w->out, "]"); + break; + } +} diff --git a/src/contrib/json.h b/src/contrib/json.h new file mode 100644 index 0000000..983b329 --- /dev/null +++ b/src/contrib/json.h @@ -0,0 +1,87 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include <stdio.h> +#include <stdbool.h> +#include <stdint.h> + +/*! + * Simple pretty JSON writer. + */ +struct jsonw; +typedef struct jsonw jsonw_t; + +/*! + * Create new JSON writer. + * + * @param out Output file stream. + * @param indent Indentation string. + * + * @return JSON writer or NULL for allocation error. + */ +jsonw_t *jsonw_new(FILE *out, const char *indent); + +/*! + * Free JSON writer created with jsonw_new. + */ +void jsonw_free(jsonw_t **w); + +/*! + * Start writing a new object. + * + * The following writes will represent key and value pairs respectively until + * jsonw_end is called. + */ +void jsonw_object(jsonw_t *w, const char *key); + +/*! + * Start writing a new list. + * + * The following writes will represent values until jsonw_end is called. + */ +void jsonw_list(jsonw_t *w, const char *key); + +/*! + * Write string as JSON. The string will be escaped properly. + */ +void jsonw_str(jsonw_t *w, const char *key, const char *value); + +/*! + * Write unsigned long value as JSON. + */ +void jsonw_ulong(jsonw_t *w, const char *key, unsigned long value); + +/*! + * Write integer as JSON. + */ +void jsonw_int(jsonw_t *w, const char *key, int value); + +/*! + * Write boolean value as JSON. + */ +void jsonw_bool(jsonw_t *w, const char *key, bool value); + +/*! + * Write binary data encoded to HEX as JSON. + */ +void jsonw_hex(jsonw_t *w, const char *key, const uint8_t *data, size_t len); + +/*! + * Terminate in-progress object or list. + */ +void jsonw_end(jsonw_t *w); diff --git a/src/contrib/libbpf/LICENSE b/src/contrib/libbpf/LICENSE new file mode 100644 index 0000000..149c7b0 --- /dev/null +++ b/src/contrib/libbpf/LICENSE @@ -0,0 +1 @@ +../licenses/LGPL-2.1
\ No newline at end of file diff --git a/src/contrib/libbpf/bpf/bpf.c b/src/contrib/libbpf/bpf/bpf.c new file mode 100644 index 0000000..98596e1 --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf.c @@ -0,0 +1,710 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * common eBPF ELF operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + */ + +#include <stdlib.h> +#include <string.h> +#include <memory.h> +#include <unistd.h> +#include <asm/unistd.h> +#include <errno.h> +#include <linux/bpf.h> +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +/* + * When building perf, unistd.h is overridden. __NR_bpf is + * required to be defined explicitly. + */ +#ifndef __NR_bpf +# if defined(__i386__) +# define __NR_bpf 357 +# elif defined(__x86_64__) +# define __NR_bpf 321 +# elif defined(__aarch64__) +# define __NR_bpf 280 +# elif defined(__sparc__) +# define __NR_bpf 349 +# elif defined(__s390__) +# define __NR_bpf 351 +# elif defined(__arc__) +# define __NR_bpf 280 +# else +# error __NR_bpf not defined. libbpf does not support your arch. +# endif +#endif + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +{ + int fd; + + do { + fd = sys_bpf(BPF_PROG_LOAD, attr, size); + } while (fd < 0 && errno == EAGAIN); + + return fd; +} + +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +{ + union bpf_attr attr; + + memset(&attr, '\0', sizeof(attr)); + + attr.map_type = create_attr->map_type; + attr.key_size = create_attr->key_size; + attr.value_size = create_attr->value_size; + attr.max_entries = create_attr->max_entries; + attr.map_flags = create_attr->map_flags; + if (create_attr->name) + memcpy(attr.map_name, create_attr->name, + min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = create_attr->numa_node; + attr.btf_fd = create_attr->btf_fd; + attr.btf_key_type_id = create_attr->btf_key_type_id; + attr.btf_value_type_id = create_attr->btf_value_type_id; + attr.map_ifindex = create_attr->map_ifindex; + attr.inner_map_fd = create_attr->inner_map_fd; + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + if (node >= 0) { + map_attr.numa_node = node; + map_attr.map_flags |= BPF_F_NUMA_NODE; + } + + return bpf_create_map_xattr(&map_attr); +} + +int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); +} + +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags, int node) +{ + union bpf_attr attr; + + memset(&attr, '\0', sizeof(attr)); + + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = 4; + attr.inner_map_fd = inner_map_fd; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + if (name) + memcpy(attr.map_name, name, + min(strlen(name), BPF_OBJ_NAME_LEN - 1)); + + if (node >= 0) { + attr.map_flags |= BPF_F_NUMA_NODE; + attr.numa_node = node; + } + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags) +{ + return bpf_create_map_in_map_node(map_type, name, key_size, + inner_map_fd, max_entries, map_flags, + -1); +} + +static void * +alloc_zero_tailing_info(const void *orecord, __u32 cnt, + __u32 actual_rec_size, __u32 expected_rec_size) +{ + __u64 info_len = (__u64)actual_rec_size * cnt; + void *info, *nrecord; + int i; + + info = malloc(info_len); + if (!info) + return NULL; + + /* zero out bytes kernel does not understand */ + nrecord = info; + for (i = 0; i < cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + return info; +} + +int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz) +{ + void *finfo = NULL, *linfo = NULL; + union bpf_attr attr; + __u32 log_level; + int fd; + + if (!load_attr || !log_buf != !log_buf_sz) + return -EINVAL; + + log_level = load_attr->log_level; + if (log_level > (4 | 2 | 1) || (log_level && !log_buf)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = load_attr->prog_type; + attr.expected_attach_type = load_attr->expected_attach_type; + if (attr.prog_type == BPF_PROG_TYPE_TRACING) { + attr.attach_btf_id = load_attr->attach_btf_id; + attr.attach_prog_fd = load_attr->attach_prog_fd; + } else { + attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = load_attr->kern_version; + } + attr.insn_cnt = (__u32)load_attr->insns_cnt; + attr.insns = ptr_to_u64(load_attr->insns); + attr.license = ptr_to_u64(load_attr->license); + + attr.log_level = log_level; + if (log_level) { + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + } else { + attr.log_buf = ptr_to_u64(NULL); + attr.log_size = 0; + } + + attr.prog_btf_fd = load_attr->prog_btf_fd; + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + attr.func_info = ptr_to_u64(load_attr->func_info); + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + attr.line_info = ptr_to_u64(load_attr->line_info); + if (load_attr->name) + memcpy(attr.prog_name, load_attr->name, + min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.prog_flags = load_attr->prog_flags; + + fd = sys_bpf_prog_load(&attr, sizeof(attr)); + if (fd >= 0) + return fd; + + /* After bpf_prog_load, the kernel may modify certain attributes + * to give user space a hint how to deal with loading failure. + * Check to see whether we can make some changes and load again. + */ + while (errno == E2BIG && (!finfo || !linfo)) { + if (!finfo && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + /* try with corrected func info records */ + finfo = alloc_zero_tailing_info(load_attr->func_info, + load_attr->func_info_cnt, + load_attr->func_info_rec_size, + attr.func_info_rec_size); + if (!finfo) + goto done; + + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + } else if (!linfo && attr.line_info_cnt && + attr.line_info_rec_size < + load_attr->line_info_rec_size) { + linfo = alloc_zero_tailing_info(load_attr->line_info, + load_attr->line_info_cnt, + load_attr->line_info_rec_size, + attr.line_info_rec_size); + if (!linfo) + goto done; + + attr.line_info = ptr_to_u64(linfo); + attr.line_info_rec_size = load_attr->line_info_rec_size; + } else { + break; + } + + fd = sys_bpf_prog_load(&attr, sizeof(attr)); + + if (fd >= 0) + goto done; + } + + if (log_level || !log_buf) + goto done; + + /* Try again with log */ + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = 1; + log_buf[0] = 0; + fd = sys_bpf_prog_load(&attr, sizeof(attr)); +done: + free(finfo); + free(linfo); + return fd; +} + +int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + struct bpf_load_program_attr load_attr; + + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); + load_attr.prog_type = type; + load_attr.expected_attach_type = 0; + load_attr.name = NULL; + load_attr.insns = insns; + load_attr.insns_cnt = insns_cnt; + load_attr.license = license; + load_attr.kern_version = kern_version; + + return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); +} + +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = type; + attr.insn_cnt = (__u32)insns_cnt; + attr.insns = ptr_to_u64(insns); + attr.license = ptr_to_u64(license); + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = log_level; + log_buf[0] = 0; + attr.kern_version = kern_version; + attr.prog_flags = prog_flags; + + return sys_bpf_prog_load(&attr, sizeof(attr)); +} + +int bpf_map_update_elem(int fd, const void *key, const void *value, + __u64 flags) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + attr.flags = flags; + + return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_lookup_elem(int fd, const void *key, void *value) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + attr.flags = flags; + + return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_delete_elem(int fd, const void *key) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + + return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_get_next_key(int fd, const void *key, void *next_key) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.next_key = ptr_to_u64(next_key); + + return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); +} + +int bpf_map_freeze(int fd) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + + return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); +} + +int bpf_obj_pin(int fd, const char *pathname) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.pathname = ptr_to_u64((void *)pathname); + attr.bpf_fd = fd; + + return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); +} + +int bpf_obj_get(const char *pathname) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.pathname = ptr_to_u64((void *)pathname); + + return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); +} + +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + attr.attach_flags = flags; + + return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) +{ + union bpf_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + attr.query.target_fd = target_fd; + attr.query.attach_type = type; + attr.query.query_flags = query_flags; + attr.query.prog_cnt = *prog_cnt; + attr.query.prog_ids = ptr_to_u64(prog_ids); + + ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + if (attach_flags) + *attach_flags = attr.query.attach_flags; + *prog_cnt = attr.query.prog_cnt; + return ret; +} + +int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, + void *data_out, __u32 *size_out, __u32 *retval, + __u32 *duration) +{ + union bpf_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.data_in = ptr_to_u64(data); + attr.test.data_out = ptr_to_u64(data_out); + attr.test.data_size_in = size; + attr.test.repeat = repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + if (size_out) + *size_out = attr.test.data_size_out; + if (retval) + *retval = attr.test.retval; + if (duration) + *duration = attr.test.duration; + return ret; +} + +int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +{ + union bpf_attr attr; + int ret; + + if (!test_attr->data_out && test_attr->data_size_out > 0) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = test_attr->prog_fd; + attr.test.data_in = ptr_to_u64(test_attr->data_in); + attr.test.data_out = ptr_to_u64(test_attr->data_out); + attr.test.data_size_in = test_attr->data_size_in; + attr.test.data_size_out = test_attr->data_size_out; + attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); + attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); + attr.test.ctx_size_in = test_attr->ctx_size_in; + attr.test.ctx_size_out = test_attr->ctx_size_out; + attr.test.repeat = test_attr->repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + test_attr->data_size_out = attr.test.data_size_out; + test_attr->ctx_size_out = attr.test.ctx_size_out; + test_attr->retval = attr.test.retval; + test_attr->duration = attr.test.duration; + return ret; +} + +static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) +{ + union bpf_attr attr; + int err; + + memset(&attr, 0, sizeof(attr)); + attr.start_id = start_id; + + err = sys_bpf(cmd, &attr, sizeof(attr)); + if (!err) + *next_id = attr.next_id; + + return err; +} + +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) +{ + return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID); +} + +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) +{ + return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID); +} + +int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id) +{ + return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID); +} + +int bpf_prog_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_id = id; + + return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_map_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_id = id; + + return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_btf_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.btf_id = id; + + return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) +{ + union bpf_attr attr; + int err; + + memset(&attr, 0, sizeof(attr)); + attr.info.bpf_fd = prog_fd; + attr.info.info_len = *info_len; + attr.info.info = ptr_to_u64(info); + + err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!err) + *info_len = attr.info.info_len; + + return err; +} + +int bpf_raw_tracepoint_open(const char *name, int prog_fd) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.raw_tracepoint.name = ptr_to_u64(name); + attr.raw_tracepoint.prog_fd = prog_fd; + + return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); +} + +int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log) +{ + union bpf_attr attr = {}; + int fd; + + attr.btf = ptr_to_u64(btf); + attr.btf_size = btf_size; + +retry: + if (do_log && log_buf && log_buf_size) { + attr.btf_log_level = 1; + attr.btf_log_size = log_buf_size; + attr.btf_log_buf = ptr_to_u64(log_buf); + } + + fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + if (fd == -1 && !do_log && log_buf && log_buf_size) { + do_log = true; + goto retry; + } + + return fd; +} + +int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, + __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, + __u64 *probe_addr) +{ + union bpf_attr attr = {}; + int err; + + attr.task_fd_query.pid = pid; + attr.task_fd_query.fd = fd; + attr.task_fd_query.flags = flags; + attr.task_fd_query.buf = ptr_to_u64(buf); + attr.task_fd_query.buf_len = *buf_len; + + err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + *buf_len = attr.task_fd_query.buf_len; + *prog_id = attr.task_fd_query.prog_id; + *fd_type = attr.task_fd_query.fd_type; + *probe_offset = attr.task_fd_query.probe_offset; + *probe_addr = attr.task_fd_query.probe_addr; + + return err; +} diff --git a/src/contrib/libbpf/bpf/bpf.h b/src/contrib/libbpf/bpf/bpf.h new file mode 100644 index 0000000..3c791fa --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * common eBPF ELF operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + */ +#ifndef __LIBBPF_BPF_H +#define __LIBBPF_BPF_H + +#include <linux/bpf.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + +struct bpf_create_map_attr { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 map_ifindex; + __u32 inner_map_fd; +}; + +LIBBPF_API int +bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags, int node); +LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags); +LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); +LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node); +LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags); + +struct bpf_load_program_attr { + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + const char *name; + const struct bpf_insn *insns; + size_t insns_cnt; + const char *license; + union { + __u32 kern_version; + __u32 attach_prog_fd; + }; + union { + __u32 prog_ifindex; + __u32 attach_btf_id; + }; + __u32 prog_btf_fd; + __u32 func_info_rec_size; + const void *func_info; + __u32 func_info_cnt; + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; + __u32 log_level; + __u32 prog_flags; +}; + +/* Flags to direct loading requirements */ +#define MAPS_RELAX_COMPAT 0x01 + +/* Recommend log buffer size */ +#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ +LIBBPF_API int +bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); +LIBBPF_API int bpf_load_program(enum bpf_prog_type type, + const struct bpf_insn *insns, size_t insns_cnt, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); +LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, + const struct bpf_insn *insns, + size_t insns_cnt, __u32 prog_flags, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz, + int log_level); + +LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, + __u64 flags); + +LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, + __u64 flags); +LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, + void *value); +LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); +LIBBPF_API int bpf_map_freeze(int fd); +LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); +LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, + enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, + enum bpf_attach_type type); + +struct bpf_prog_test_run_attr { + int prog_fd; + int repeat; + const void *data_in; + __u32 data_size_in; + void *data_out; /* optional */ + __u32 data_size_out; /* in: max length of data_out + * out: length of data_out */ + __u32 retval; /* out: return code of the BPF program */ + __u32 duration; /* out: average per repetition in ns */ + const void *ctx_in; /* optional */ + __u32 ctx_size_in; + void *ctx_out; /* optional */ + __u32 ctx_size_out; /* in: max length of ctx_out + * out: length of cxt_out */ +}; + +LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); + +/* + * bpf_prog_test_run does not check that data_out is large enough. Consider + * using bpf_prog_test_run_xattr instead. + */ +LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, + __u32 size, void *data_out, __u32 *size_out, + __u32 *retval, __u32 *duration); +LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); +LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, + __u32 query_flags, __u32 *attach_flags, + __u32 *prog_ids, __u32 *prog_cnt); +LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); +LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, + __u32 log_buf_size, bool do_log); +LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, + __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_BPF_H */ diff --git a/src/contrib/libbpf/bpf/bpf_core_read.h b/src/contrib/libbpf/bpf/bpf_core_read.h new file mode 100644 index 0000000..7009dc9 --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf_core_read.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_CORE_READ_H__ +#define __BPF_CORE_READ_H__ + +/* + * enum bpf_field_info_kind is passed as a second argument into + * __builtin_preserve_field_info() built-in to get a specific aspect of + * a field, captured as a first argument. __builtin_preserve_field_info(field, + * info_kind) returns __u32 integer and produces BTF field relocation, which + * is understood and processed by libbpf during BPF object loading. See + * selftests/bpf for examples. + */ +enum bpf_field_info_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_BYTE_SIZE = 1, + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_FIELD_SIGNED = 3, + BPF_FIELD_LSHIFT_U64 = 4, + BPF_FIELD_RSHIFT_U64 = 5, +}; + +#define __CORE_RELO(src, field, info) \ + __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ + bpf_probe_read((void *)dst, \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +#else +/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so + * for big-endian we need to adjust destination pointer accordingly, based on + * field byte size + */ +#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ + bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +#endif + +/* + * Extract bitfield, identified by s->field, and return its value as u64. + * All this is done in relocatable manner, so bitfield changes such as + * signedness, bit size, offset changes, this will be handled automatically. + * This version of macro is using bpf_probe_read() to read underlying integer + * storage. Macro functions as an expression and its return type is + * bpf_probe_read()'s return value: 0, on success, <0 on error. + */ +#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ + unsigned long long val = 0; \ + \ + __CORE_BITFIELD_PROBE_READ(&val, s, field); \ + val <<= __CORE_RELO(s, field, LSHIFT_U64); \ + if (__CORE_RELO(s, field, SIGNED)) \ + val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ + else \ + val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ + val; \ +}) + +/* + * Extract bitfield, identified by s->field, and return its value as u64. + * This version of macro is using direct memory reads and should be used from + * BPF program types that support such functionality (e.g., typed raw + * tracepoints). + */ +#define BPF_CORE_READ_BITFIELD(s, field) ({ \ + const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ + unsigned long long val; \ + \ + switch (__CORE_RELO(s, field, BYTE_SIZE)) { \ + case 1: val = *(const unsigned char *)p; \ + case 2: val = *(const unsigned short *)p; \ + case 4: val = *(const unsigned int *)p; \ + case 8: val = *(const unsigned long long *)p; \ + } \ + val <<= __CORE_RELO(s, field, LSHIFT_U64); \ + if (__CORE_RELO(s, field, SIGNED)) \ + val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ + else \ + val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ + val; \ +}) + +/* + * Convenience macro to check that field actually exists in target kernel's. + * Returns: + * 1, if matching field is present in target kernel; + * 0, if no matching field found. + */ +#define bpf_core_field_exists(field) \ + __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) + +/* + * Convenience macro to get byte size of a field. Works for integers, + * struct/unions, pointers, arrays, and enums. + */ +#define bpf_core_field_size(field) \ + __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) + +/* + * bpf_core_read() abstracts away bpf_probe_read() call and captures offset + * relocation for source address using __builtin_preserve_access_index() + * built-in, provided by Clang. + * + * __builtin_preserve_access_index() takes as an argument an expression of + * taking an address of a field within struct/union. It makes compiler emit + * a relocation, which records BTF type ID describing root struct/union and an + * accessor string which describes exact embedded field that was used to take + * an address. See detailed description of this relocation format and + * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. + * + * This relocation allows libbpf to adjust BPF instruction to use correct + * actual field offset, based on target kernel BTF type that matches original + * (local) BTF, used to record relocation. + */ +#define bpf_core_read(dst, sz, src) \ + bpf_probe_read(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) + +/* + * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() + * additionally emitting BPF CO-RE field relocation for specified source + * argument. + */ +#define bpf_core_read_str(dst, sz, src) \ + bpf_probe_read_str(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) + +#define ___concat(a, b) a ## b +#define ___apply(fn, n) ___concat(fn, n) +#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N + +/* + * return number of provided arguments; used for switch-based variadic macro + * definitions (see ___last, ___arrow, etc below) + */ +#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +/* + * return 0 if no arguments are passed, N - otherwise; used for + * recursively-defined macros to specify termination (0) case, and generic + * (N) case (e.g., ___read_ptrs, ___core_read) + */ +#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___last1(x) x +#define ___last2(a, x) x +#define ___last3(a, b, x) x +#define ___last4(a, b, c, x) x +#define ___last5(a, b, c, d, x) x +#define ___last6(a, b, c, d, e, x) x +#define ___last7(a, b, c, d, e, f, x) x +#define ___last8(a, b, c, d, e, f, g, x) x +#define ___last9(a, b, c, d, e, f, g, h, x) x +#define ___last10(a, b, c, d, e, f, g, h, i, x) x +#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___nolast2(a, _) a +#define ___nolast3(a, b, _) a, b +#define ___nolast4(a, b, c, _) a, b, c +#define ___nolast5(a, b, c, d, _) a, b, c, d +#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e +#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f +#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g +#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h +#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i +#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___arrow1(a) a +#define ___arrow2(a, b) a->b +#define ___arrow3(a, b, c) a->b->c +#define ___arrow4(a, b, c, d) a->b->c->d +#define ___arrow5(a, b, c, d, e) a->b->c->d->e +#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f +#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g +#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h +#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i +#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j +#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___type(...) typeof(___arrow(__VA_ARGS__)) + +#define ___read(read_fn, dst, src_type, src, accessor) \ + read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) + +/* "recursively" read a sequence of inner pointers using local __t var */ +#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); +#define ___rd_last(...) \ + ___read(bpf_core_read, &__t, \ + ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) +#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___read_ptrs(src, ...) \ + ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) + +#define ___core_read0(fn, dst, src, a) \ + ___read(fn, dst, ___type(src), src, a); +#define ___core_readN(fn, dst, src, ...) \ + ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ + ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ + ___last(__VA_ARGS__)); +#define ___core_read(fn, dst, src, a, ...) \ + ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ + src, a, ##__VA_ARGS__) + +/* + * BPF_CORE_READ_INTO() is a more performance-conscious variant of + * BPF_CORE_READ(), in which final field is read into user-provided storage. + * See BPF_CORE_READ() below for more details on general usage. + */ +#define BPF_CORE_READ_INTO(dst, src, a, ...) \ + ({ \ + ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ + }) + +/* + * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as + * BPF_CORE_READ() for intermediate pointers, but then executes (and returns + * corresponding error code) bpf_core_read_str() for final string read. + */ +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ + ({ \ + ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ + }) + +/* + * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially + * when there are few pointer chasing steps. + * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: + * int x = s->a.b.c->d.e->f->g; + * can be succinctly achieved using BPF_CORE_READ as: + * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); + * + * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF + * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * 1. const void *__t = s->a.b.c; + * 2. __t = __t->d.e; + * 3. __t = __t->f; + * 4. return __t->g; + * + * Equivalence is logical, because there is a heavy type casting/preservation + * involved, as well as all the reads are happening through bpf_probe_read() + * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * + * N.B. Only up to 9 "field accessors" are supported, which should be more + * than enough for any practical purpose. + */ +#define BPF_CORE_READ(src, a, ...) \ + ({ \ + ___type(src, a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ + __r; \ + }) + +#endif + diff --git a/src/contrib/libbpf/bpf/bpf_endian.h b/src/contrib/libbpf/bpf/bpf_endian.h new file mode 100644 index 0000000..fbe2800 --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf_endian.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_ENDIAN__ +#define __BPF_ENDIAN__ + +#include <linux/stddef.h> +#include <linux/swab.h> + +/* LLVM's BPF target selects the endianness of the CPU + * it compiles on, or the user specifies (bpfel/bpfeb), + * respectively. The used __BYTE_ORDER__ is defined by + * the compiler, we cannot rely on __BYTE_ORDER from + * libc headers, since it doesn't reflect the actual + * requested byte order. + * + * Note, LLVM's BPF target has different __builtin_bswapX() + * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE + * in bpfel and bpfeb case, which means below, that we map + * to cpu_to_be16(). We could use it unconditionally in BPF + * case, but better not rely on it, so that this header here + * can be used from application and BPF program side, which + * use different targets. + */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __bpf_ntohs(x) __builtin_bswap16(x) +# define __bpf_htons(x) __builtin_bswap16(x) +# define __bpf_constant_ntohs(x) ___constant_swab16(x) +# define __bpf_constant_htons(x) ___constant_swab16(x) +# define __bpf_ntohl(x) __builtin_bswap32(x) +# define __bpf_htonl(x) __builtin_bswap32(x) +# define __bpf_constant_ntohl(x) ___constant_swab32(x) +# define __bpf_constant_htonl(x) ___constant_swab32(x) +# define __bpf_be64_to_cpu(x) __builtin_bswap64(x) +# define __bpf_cpu_to_be64(x) __builtin_bswap64(x) +# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x) +# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __bpf_ntohs(x) (x) +# define __bpf_htons(x) (x) +# define __bpf_constant_ntohs(x) (x) +# define __bpf_constant_htons(x) (x) +# define __bpf_ntohl(x) (x) +# define __bpf_htonl(x) (x) +# define __bpf_constant_ntohl(x) (x) +# define __bpf_constant_htonl(x) (x) +# define __bpf_be64_to_cpu(x) (x) +# define __bpf_cpu_to_be64(x) (x) +# define __bpf_constant_be64_to_cpu(x) (x) +# define __bpf_constant_cpu_to_be64(x) (x) +#else +# error "Fix your compiler's __BYTE_ORDER__?!" +#endif + +#define bpf_htons(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_htons(x) : __bpf_htons(x)) +#define bpf_ntohs(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_ntohs(x) : __bpf_ntohs(x)) +#define bpf_htonl(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_htonl(x) : __bpf_htonl(x)) +#define bpf_ntohl(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_ntohl(x) : __bpf_ntohl(x)) +#define bpf_cpu_to_be64(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) +#define bpf_be64_to_cpu(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) + +#endif /* __BPF_ENDIAN__ */ diff --git a/src/contrib/libbpf/bpf/bpf_helper_defs.h b/src/contrib/libbpf/bpf/bpf_helper_defs.h new file mode 100644 index 0000000..1f357f6 --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf_helper_defs.h @@ -0,0 +1,2759 @@ +/* This is auto-generated file. See bpf_helpers_doc.py for details. */ + +/* Forward declarations of BPF structs */ +struct bpf_fib_lookup; +struct bpf_perf_event_data; +struct bpf_perf_event_value; +struct bpf_sock; +struct bpf_sock_addr; +struct bpf_sock_ops; +struct bpf_sock_tuple; +struct bpf_spin_lock; +struct bpf_sysctl; +struct bpf_tcp_sock; +struct bpf_tunnel_key; +struct bpf_xfrm_state; +struct pt_regs; +struct sk_reuseport_md; +struct sockaddr; +struct tcphdr; +struct __sk_buff; +struct sk_msg_md; +struct xdp_md; + +/* + * bpf_map_lookup_elem + * + * Perform a lookup in *map* for an entry associated to *key*. + * + * Returns + * Map value associated to *key*, or **NULL** if no entry was + * found. + */ +static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) 1; + +/* + * bpf_map_update_elem + * + * Add or update the value of the entry associated to *key* in + * *map* with *value*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * Flag value **BPF_NOEXIST** cannot be used for maps of types + * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all + * elements always exist), the helper would return an error. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *) 2; + +/* + * bpf_map_delete_elem + * + * Delete entry with *key* from *map*. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3; + +/* + * bpf_probe_read + * + * For tracing programs, safely attempt to read *size* bytes from + * kernel space address *unsafe_ptr* and store the data in *dst*. + * + * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() + * instead. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 4; + +/* + * bpf_ktime_get_ns + * + * Return the time elapsed since system boot, in nanoseconds. + * + * Returns + * Current *ktime*. + */ +static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5; + +/* + * bpf_trace_printk + * + * This helper is a "printk()-like" facility for debugging. It + * prints a message defined by format *fmt* (of size *fmt_size*) + * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * available. It can take up to three additional **u64** + * arguments (as an eBPF helpers, the total number of arguments is + * limited to five). + * + * Each time the helper is called, it appends a line to the trace. + * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is + * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * The format of the trace is customizable, and the exact output + * one will get depends on the options set in + * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *README* file under the same directory). However, it usually + * defaults to something like: + * + * :: + * + * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> + * + * In the above: + * + * * ``telnet`` is the name of the current task. + * * ``470`` is the PID of the current task. + * * ``001`` is the CPU number on which the task is + * running. + * * In ``.N..``, each character refers to a set of + * options (whether irqs are enabled, scheduling + * options, whether hard/softirqs are running, level of + * preempt_disabled respectively). **N** means that + * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** + * are set. + * * ``419421.045894`` is a timestamp. + * * ``0x00000001`` is a fake value used by BPF for the + * instruction pointer register. + * * ``<formatted msg>`` is the message formatted with + * *fmt*. + * + * The conversion specifiers supported by *fmt* are similar, but + * more limited than for printk(). They are **%d**, **%i**, + * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, + * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size + * of field, padding with zeroes, etc.) is available, and the + * helper will return **-EINVAL** (but print nothing) if it + * encounters an unknown specifier. + * + * Also, note that **bpf_trace_printk**\ () is slow, and should + * only be used for debugging purposes. For this reason, a notice + * bloc (spanning several lines) is printed to kernel logs and + * states that the helper should not be used "for production use" + * the first time this helper is used (or more precisely, when + * **trace_printk**\ () buffers are allocated). For passing values + * to user space, perf events should be preferred. + * + * Returns + * The number of bytes written to the buffer, or a negative error + * in case of failure. + */ +static int (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6; + +/* + * bpf_get_prandom_u32 + * + * Get a pseudo-random number. + * + * From a security point of view, this helper uses its own + * pseudo-random internal state, and cannot be used to infer the + * seed of other random functions in the kernel. However, it is + * essential to note that the generator used by the helper is not + * cryptographically secure. + * + * Returns + * A random 32-bit unsigned value. + */ +static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7; + +/* + * bpf_get_smp_processor_id + * + * Get the SMP (symmetric multiprocessing) processor id. Note that + * all programs run with preemption disabled, which means that the + * SMP processor id is stable during all the execution of the + * program. + * + * Returns + * The SMP id of the processor running the program. + */ +static __u32 (*bpf_get_smp_processor_id)(void) = (void *) 8; + +/* + * bpf_skb_store_bytes + * + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. *flags* are a combination of + * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the + * checksum for the packet after storing the bytes) and + * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ + * **->swhash** and *skb*\ **->l4hash** to 0). + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len, __u64 flags) = (void *) 9; + +/* + * bpf_l3_csum_replace + * + * Recompute the layer 3 (e.g. IP) checksum for the packet + * associated to *skb*. Computation is incremental, so the helper + * must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored in *size*. + * Alternatively, it is possible to store the difference between + * the previous and the new values of the header field in *to*, by + * setting *from* and *size* to 0. For both methods, *offset* + * indicates the location of the IP checksum within the packet. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 size) = (void *) 10; + +/* + * bpf_l4_csum_replace + * + * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the + * packet associated to *skb*. Computation is incremental, so the + * helper must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored on the lowest + * four bits of *flags*. Alternatively, it is possible to store + * the difference between the previous and the new values of the + * header field in *to*, by setting *from* and the four lowest + * bits of *flags* to 0. For both methods, *offset* indicates the + * location of the IP checksum within the packet. In addition to + * the size of the field, *flags* can be added (bitwise OR) actual + * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left + * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 flags) = (void *) 11; + +/* + * bpf_tail_call + * + * This special helper is used to trigger a "tail call", or in + * other words, to jump into another eBPF program. The same stack + * frame is used (but values on stack and in registers for the + * caller are not accessible to the callee). This mechanism allows + * for program chaining, either for raising the maximum number of + * available eBPF instructions, or to execute given programs in + * conditional blocks. For security reasons, there is an upper + * limit to the number of successive tail calls that can be + * performed. + * + * Upon call of this helper, the program attempts to jump into a + * program referenced at index *index* in *prog_array_map*, a + * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes + * *ctx*, a pointer to the context. + * + * If the call succeeds, the kernel immediately runs the first + * instruction of the new program. This is not a function call, + * and it never returns to the previous program. If the call + * fails, then the helper has no effect, and the caller continues + * to run its subsequent instructions. A call can fail if the + * destination program for the jump does not exist (i.e. *index* + * is superior to the number of entries in *prog_array_map*), or + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), + * which is currently set to 32. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_tail_call)(void *ctx, void *prog_array_map, __u32 index) = (void *) 12; + +/* + * bpf_clone_redirect + * + * Clone and redirect the packet associated to *skb* to another + * net device of index *ifindex*. Both ingress and egress + * interfaces can be used for redirection. The **BPF_F_INGRESS** + * value in *flags* is used to make the distinction (ingress path + * is selected if the flag is present, egress path otherwise). + * This is the only flag supported for now. + * + * In comparison with **bpf_redirect**\ () helper, + * **bpf_clone_redirect**\ () has the associated cost of + * duplicating the packet buffer, but this can be executed out of + * the eBPF program. Conversely, **bpf_redirect**\ () is more + * efficient, but it is handled through an action code where the + * redirection happens only after the eBPF program has returned. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 flags) = (void *) 13; + +/* + * bpf_get_current_pid_tgid + * + * + * Returns + * A 64-bit integer containing the current tgid and pid, and + * created as such: + * *current_task*\ **->tgid << 32 \|** + * *current_task*\ **->pid**. + */ +static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14; + +/* + * bpf_get_current_uid_gid + * + * + * Returns + * A 64-bit integer containing the current GID and UID, and + * created as such: *current_gid* **<< 32 \|** *current_uid*. + */ +static __u64 (*bpf_get_current_uid_gid)(void) = (void *) 15; + +/* + * bpf_get_current_comm + * + * Copy the **comm** attribute of the current task into *buf* of + * *size_of_buf*. The **comm** attribute contains the name of + * the executable (excluding the path) for the current task. The + * *size_of_buf* must be strictly positive. On success, the + * helper makes sure that the *buf* is NUL-terminated. On failure, + * it is filled with zeroes. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *) 16; + +/* + * bpf_get_cgroup_classid + * + * Retrieve the classid for the current task, i.e. for the net_cls + * cgroup to which *skb* belongs. + * + * This helper can be used on TC egress path, but not on ingress. + * + * The net_cls cgroup provides an interface to tag network packets + * based on a user-provided identifier for all traffic coming from + * the tasks belonging to the related cgroup. See also the related + * kernel documentation, available from the Linux sources in file + * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. + * + * The Linux kernel has two versions for cgroups: there are + * cgroups v1 and cgroups v2. Both are available to users, who can + * use a mixture of them, but note that the net_cls cgroup is for + * cgroup v1 only. This makes it incompatible with BPF programs + * run on cgroups, which is a cgroup-v2-only feature (a socket can + * only hold data for one version of cgroups at a time). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to + * "**y**" or to "**m**". + * + * Returns + * The classid, or 0 for the default unconfigured classid. + */ +static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *) 17; + +/* + * bpf_skb_vlan_push + * + * Push a *vlan_tci* (VLAN tag control information) of protocol + * *vlan_proto* to the packet associated to *skb*, then update + * the checksum. Note that if *vlan_proto* is different from + * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to + * be **ETH_P_8021Q**. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto, __u16 vlan_tci) = (void *) 18; + +/* + * bpf_skb_vlan_pop + * + * Pop a VLAN header from the packet associated to *skb*. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *) 19; + +/* + * bpf_skb_get_tunnel_key + * + * Get tunnel metadata. This helper takes a pointer *key* to an + * empty **struct bpf_tunnel_key** of **size**, that will be + * filled with tunnel metadata for the packet associated to *skb*. + * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which + * indicates that the tunnel is based on IPv6 protocol instead of + * IPv4. + * + * The **struct bpf_tunnel_key** is an object that generalizes the + * principal parameters used by various tunneling protocols into a + * single struct. This way, it can be used to easily make a + * decision based on the contents of the encapsulation header, + * "summarized" in this struct. In particular, it holds the IP + * address of the remote end (IPv4 or IPv6, depending on the case) + * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, + * this struct exposes the *key*\ **->tunnel_id**, which is + * generally mapped to a VNI (Virtual Network Identifier), making + * it programmable together with the **bpf_skb_set_tunnel_key**\ + * () helper. + * + * Let's imagine that the following code is part of a program + * attached to the TC ingress interface, on one end of a GRE + * tunnel, and is supposed to filter out all messages coming from + * remote ends with IPv4 address other than 10.0.0.1: + * + * :: + * + * int ret; + * struct bpf_tunnel_key key = {}; + * + * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + * if (ret < 0) + * return TC_ACT_SHOT; // drop packet + * + * if (key.remote_ipv4 != 0x0a000001) + * return TC_ACT_SHOT; // drop packet + * + * return TC_ACT_OK; // accept packet + * + * This interface can also be used with all encapsulation devices + * that can operate in "collect metadata" mode: instead of having + * one network device per specific configuration, the "collect + * metadata" mode only requires a single device where the + * configuration can be extracted from this helper. + * + * This can be used together with various tunnels such as VXLan, + * Geneve, GRE or IP in IP (IPIP). + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 20; + +/* + * bpf_skb_set_tunnel_key + * + * Populate tunnel metadata for packet associated to *skb.* The + * tunnel metadata is set to the contents of *key*, of *size*. The + * *flags* can be set to a combination of the following values: + * + * **BPF_F_TUNINFO_IPV6** + * Indicate that the tunnel is based on IPv6 protocol + * instead of IPv4. + * **BPF_F_ZERO_CSUM_TX** + * For IPv4 packets, add a flag to tunnel metadata + * indicating that checksum computation should be skipped + * and checksum set to zeroes. + * **BPF_F_DONT_FRAGMENT** + * Add a flag to tunnel metadata indicating that the + * packet should not be fragmented. + * **BPF_F_SEQ_NUMBER** + * Add a flag to tunnel metadata indicating that a + * sequence number should be added to tunnel header before + * sending the packet. This flag was added for GRE + * encapsulation, but might be used with other protocols + * as well in the future. + * + * Here is a typical usage on the transmit path: + * + * :: + * + * struct bpf_tunnel_key key; + * populate key ... + * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); + * + * See also the description of the **bpf_skb_get_tunnel_key**\ () + * helper for additional information. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 21; + +/* + * bpf_perf_event_read + * + * Read the value of a perf event counter. This helper relies on a + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of + * the perf event counter is selected when *map* is updated with + * perf event file descriptors. The *map* is an array whose size + * is the number of available CPUs, and each cell contains a value + * relative to one CPU. The value to retrieve is indicated by + * *flags*, that contains the index of the CPU to look up, masked + * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * Note that before Linux 4.13, only hardware perf event can be + * retrieved. + * + * Also, be aware that the newer helper + * **bpf_perf_event_read_value**\ () is recommended over + * **bpf_perf_event_read**\ () in general. The latter has some ABI + * quirks where error and counter value are used as a return code + * (which is wrong to do since ranges may overlap). This issue is + * fixed with **bpf_perf_event_read_value**\ (), which at the same + * time provides more features over the **bpf_perf_event_read**\ + * () interface. Please refer to the description of + * **bpf_perf_event_read_value**\ () for details. + * + * Returns + * The value of the perf event counter read from the map, or a + * negative error code in case of failure. + */ +static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *) 22; + +/* + * bpf_redirect + * + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_clone_redirect**\ + * (), except that the packet is not cloned, which provides + * increased performance. + * + * Except for XDP, both ingress and egress interfaces can be used + * for redirection. The **BPF_F_INGRESS** value in *flags* is used + * to make the distinction (ingress path is selected if the flag + * is present, egress path otherwise). Currently, XDP only + * supports redirection to the egress interface, and accepts no + * flag at all. + * + * The same effect can be attained with the more generic + * **bpf_redirect_map**\ (), which requires specific maps to be + * used but offers better performance. + * + * Returns + * For XDP, the helper returns **XDP_REDIRECT** on success or + * **XDP_ABORTED** on error. For other program types, the values + * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on + * error. + */ +static int (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *) 23; + +/* + * bpf_get_route_realm + * + * Retrieve the realm or the route, that is to say the + * **tclassid** field of the destination for the *skb*. The + * indentifier retrieved is a user-provided tag, similar to the + * one used with the net_cls cgroup (see description for + * **bpf_get_cgroup_classid**\ () helper), but here this tag is + * held by a route (a destination entry), not by a task. + * + * Retrieving this identifier works with the clsact TC egress hook + * (see also **tc-bpf(8)**), or alternatively on conventional + * classful egress qdiscs, but not on TC ingress path. In case of + * clsact TC egress hook, this has the advantage that, internally, + * the destination entry has not been dropped yet in the transmit + * path. Therefore, the destination entry does not need to be + * artificially held via **netif_keep_dst**\ () for a classful + * qdisc until the *skb* is freed. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_IP_ROUTE_CLASSID** configuration option. + * + * Returns + * The realm of the route for the packet associated to *skb*, or 0 + * if none was found. + */ +static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *) 24; + +/* + * bpf_perf_event_output + * + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * The context of the program *ctx* needs also be passed to the + * helper. + * + * On user space, a program willing to read the values needs to + * call **perf_event_open**\ () on the perf event (either for + * one or for all CPUs) and to store the file descriptor into the + * *map*. This must be done before the eBPF program can send data + * into it. An example is available in file + * *samples/bpf/trace_output_user.c* in the Linux kernel source + * tree (the eBPF program counterpart is in + * *samples/bpf/trace_output_kern.c*). + * + * **bpf_perf_event_output**\ () achieves better performance + * than **bpf_trace_printk**\ () for sharing data with user + * space, and is much better suitable for streaming data from eBPF + * programs. + * + * Note that this helper is not restricted to tracing use cases + * and can be used with programs attached to TC or XDP as well, + * where it allows for passing data to user space listeners. Data + * can be: + * + * * Only custom structs, + * * Only the packet payload, or + * * A combination of both. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 25; + +/* + * bpf_skb_load_bytes + * + * This helper was provided as an easy way to load data from a + * packet. It can be used to load *len* bytes from *offset* from + * the packet associated to *skb*, into the buffer pointed by + * *to*. + * + * Since Linux 4.7, usage of this helper has mostly been replaced + * by "direct packet access", enabling packet data to be + * manipulated with *skb*\ **->data** and *skb*\ **->data_end** + * pointing respectively to the first byte of packet data and to + * the byte after the last byte of packet data. However, it + * remains useful if one wishes to read large quantities of data + * at once from a packet into the eBPF stack. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to, __u32 len) = (void *) 26; + +/* + * bpf_get_stackid + * + * Walk a user or a kernel stack and return its id. To achieve + * this, the helper needs *ctx*, which is a pointer to the context + * on which the tracing program is executed, and a pointer to a + * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * a combination of the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_FAST_STACK_CMP** + * Compare stacks by hash only. + * **BPF_F_REUSE_STACKID** + * If two different stacks hash into the same *stackid*, + * discard the old one. + * + * The stack id retrieved is a 32 bit long integer handle which + * can be further combined with other data (including other stack + * ids) and used as a key into maps. This can be useful for + * generating a variety of graphs (such as flame graphs or off-cpu + * graphs). + * + * For walking a stack, this helper is an improvement over + * **bpf_probe_read**\ (), which can be used with unrolled loops + * but is not efficient and consumes a lot of eBPF instructions. + * Instead, **bpf_get_stackid**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * + * Returns + * The positive or null stack id on success, or a negative error + * in case of failure. + */ +static int (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *) 27; + +/* + * bpf_csum_diff + * + * Compute a checksum difference, from the raw buffer pointed by + * *from*, of length *from_size* (that must be a multiple of 4), + * towards the raw buffer pointed by *to*, of size *to_size* + * (same remark). An optional *seed* can be added to the value + * (this can be cascaded, the seed may come from a previous call + * to the helper). + * + * This is flexible enough to be used in several ways: + * + * * With *from_size* == 0, *to_size* > 0 and *seed* set to + * checksum, it can be used when pushing new data. + * * With *from_size* > 0, *to_size* == 0 and *seed* set to + * checksum, it can be used when removing data from a packet. + * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it + * can be used to compute a diff. Note that *from_size* and + * *to_size* do not need to be equal. + * + * This helper can be used in combination with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to + * which one can feed in the difference computed with + * **bpf_csum_diff**\ (). + * + * Returns + * The checksum result, or a negative error code in case of + * failure. + */ +static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed) = (void *) 28; + +/* + * bpf_skb_get_tunnel_opt + * + * Retrieve tunnel options metadata for the packet associated to + * *skb*, and store the raw tunnel option data to the buffer *opt* + * of *size*. + * + * This helper can be used with encapsulation devices that can + * operate in "collect metadata" mode (please refer to the related + * note in the description of **bpf_skb_get_tunnel_key**\ () for + * more details). A particular example where this can be used is + * in combination with the Geneve encapsulation protocol, where it + * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) + * and retrieving arbitrary TLVs (Type-Length-Value headers) from + * the eBPF program. This allows for full customization of these + * headers. + * + * Returns + * The size of the option data retrieved. + */ +static int (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 29; + +/* + * bpf_skb_set_tunnel_opt + * + * Set tunnel options metadata for the packet associated to *skb* + * to the option data contained in the raw buffer *opt* of *size*. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 30; + +/* + * bpf_skb_change_proto + * + * Change the protocol of the *skb* to *proto*. Currently + * supported are transition from IPv4 to IPv6, and from IPv6 to + * IPv4. The helper takes care of the groundwork for the + * transition, including resizing the socket buffer. The eBPF + * program is expected to fill the new headers, if any, via + * **skb_store_bytes**\ () and to recompute the checksums with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ + * (). The main case for this helper is to perform NAT64 + * operations out of an eBPF program. + * + * Internally, the GSO type is marked as dodgy so that headers are + * checked and segments are recalculated by the GSO/GRO engine. + * The size for GSO target is adapted as well. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto, __u64 flags) = (void *) 31; + +/* + * bpf_skb_change_type + * + * Change the packet type for the packet associated to *skb*. This + * comes down to setting *skb*\ **->pkt_type** to *type*, except + * the eBPF program does not have a write access to *skb*\ + * **->pkt_type** beside this helper. Using a helper here allows + * for graceful handling of errors. + * + * The major use case is to change incoming *skb*s to + * **PACKET_HOST** in a programmatic way instead of having to + * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for + * example. + * + * Note that *type* only allows certain values. At this time, they + * are: + * + * **PACKET_HOST** + * Packet is for us. + * **PACKET_BROADCAST** + * Send packet to all. + * **PACKET_MULTICAST** + * Send packet to group. + * **PACKET_OTHERHOST** + * Send packet to someone else. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_change_type)(struct __sk_buff *skb, __u32 type) = (void *) 32; + +/* + * bpf_skb_under_cgroup + * + * Check whether *skb* is a descendant of the cgroup2 held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * + * Returns + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* failed the cgroup2 descendant test. + * * 1, if the *skb* succeeded the cgroup2 descendant test. + * * A negative error code, if an error occurred. + */ +static int (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map, __u32 index) = (void *) 33; + +/* + * bpf_get_hash_recalc + * + * Retrieve the hash of the packet, *skb*\ **->hash**. If it is + * not set, in particular if the hash was cleared due to mangling, + * recompute this hash. Later accesses to the hash can be done + * directly with *skb*\ **->hash**. + * + * Calling **bpf_set_hash_invalid**\ (), changing a packet + * prototype with **bpf_skb_change_proto**\ (), or calling + * **bpf_skb_store_bytes**\ () with the + * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear + * the hash and to trigger a new computation for the next call to + * **bpf_get_hash_recalc**\ (). + * + * Returns + * The 32-bit hash. + */ +static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34; + +/* + * bpf_get_current_task + * + * + * Returns + * A pointer to the current task struct. + */ +static __u64 (*bpf_get_current_task)(void) = (void *) 35; + +/* + * bpf_probe_write_user + * + * Attempt in a safe way to write *len* bytes from the buffer + * *src* to *dst* in memory. It only works for threads that are in + * user context, and *dst* must be a valid user space address. + * + * This helper should not be used to implement any kind of + * security mechanism because of TOC-TOU attacks, but rather to + * debug, divert, and manipulate execution of semi-cooperative + * processes. + * + * Keep in mind that this feature is meant for experiments, and it + * has a risk of crashing the system and running programs. + * Therefore, when an eBPF program using this helper is attached, + * a warning including PID and process name is printed to kernel + * logs. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (void *) 36; + +/* + * bpf_current_task_under_cgroup + * + * Check whether the probe is being run is the context of a given + * subset of the cgroup2 hierarchy. The cgroup2 to test is held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * + * Returns + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* task belongs to the cgroup2. + * * 1, if the *skb* task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + */ +static int (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37; + +/* + * bpf_skb_change_tail + * + * Resize (trim or grow) the packet associated to *skb* to the + * new *len*. The *flags* are reserved for future usage, and must + * be left at zero. + * + * The basic idea is that the helper performs the needed work to + * change the size of the packet, then the eBPF program rewrites + * the rest via helpers like **bpf_skb_store_bytes**\ (), + * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () + * and others. This helper is a slow path utility intended for + * replies with control messages. And because it is targeted for + * slow path, the helper itself can afford to be slow: it + * implicitly linearizes, unclones and drops offloads from the + * *skb*. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 38; + +/* + * bpf_skb_pull_data + * + * Pull in non-linear data in case the *skb* is non-linear and not + * all of *len* are part of the linear section. Make *len* bytes + * from *skb* readable and writable. If a zero value is passed for + * *len*, then the whole length of the *skb* is pulled. + * + * This helper is only needed for reading and writing with direct + * packet access. + * + * For direct packet access, testing that offsets to access + * are within packet boundaries (test on *skb*\ **->data_end**) is + * susceptible to fail if offsets are invalid, or if the requested + * data is in non-linear parts of the *skb*. On failure the + * program can just bail out, or in the case of a non-linear + * buffer, use a helper to make the data available. The + * **bpf_skb_load_bytes**\ () helper is a first solution to access + * the data. Another one consists in using **bpf_skb_pull_data** + * to pull in once the non-linear parts, then retesting and + * eventually access the data. + * + * At the same time, this also makes sure the *skb* is uncloned, + * which is a necessary condition for direct write. As this needs + * to be an invariant for the write part only, the verifier + * detects writes and adds a prologue that is calling + * **bpf_skb_pull_data()** to effectively unclone the *skb* from + * the very beginning in case it is indeed cloned. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_pull_data)(struct __sk_buff *skb, __u32 len) = (void *) 39; + +/* + * bpf_csum_update + * + * Add the checksum *csum* into *skb*\ **->csum** in case the + * driver has supplied a checksum for the entire packet into that + * field. Return an error otherwise. This helper is intended to be + * used in combination with **bpf_csum_diff**\ (), in particular + * when the checksum needs to be updated after data has been + * written into the packet through direct packet access. + * + * Returns + * The checksum on success, or a negative error code in case of + * failure. + */ +static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 40; + +/* + * bpf_set_hash_invalid + * + * Invalidate the current *skb*\ **->hash**. It can be used after + * mangling on headers through direct packet access, in order to + * indicate that the hash is outdated and to trigger a + * recalculation the next time the kernel tries to access this + * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * + */ +static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41; + +/* + * bpf_get_numa_node_id + * + * Return the id of the current NUMA node. The primary use case + * for this helper is the selection of sockets for the local NUMA + * node, when the program is attached to sockets using the + * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), + * but the helper is also available to other eBPF program types, + * similarly to **bpf_get_smp_processor_id**\ (). + * + * Returns + * The id of current NUMA node. + */ +static int (*bpf_get_numa_node_id)(void) = (void *) 42; + +/* + * bpf_skb_change_head + * + * Grows headroom of packet associated to *skb* and adjusts the + * offset of the MAC header accordingly, adding *len* bytes of + * space. It automatically extends and reallocates memory as + * required. + * + * This helper can be used on a layer 3 *skb* to push a MAC header + * for redirection into a layer 2 device. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_change_head)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 43; + +/* + * bpf_xdp_adjust_head + * + * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that + * it is possible to use a negative value for *delta*. This helper + * can be used to prepare the packet for pushing or popping + * headers. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 44; + +/* + * bpf_probe_read_str + * + * Copy a NUL terminated string from an unsafe kernel address + * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for + * more details. + * + * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str() + * instead. + * + * Returns + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + */ +static int (*bpf_probe_read_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 45; + +/* + * bpf_get_socket_cookie + * + * If the **struct sk_buff** pointed by *skb* has a known socket, + * retrieve the cookie (generated by the kernel) of this socket. + * If no cookie has been set yet, generate a new cookie. Once + * generated, the socket cookie remains stable for the life of the + * socket. This helper can be useful for monitoring per socket + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. + * + * Returns + * A 8-byte long non-decreasing number on success, or 0 if the + * socket field is missing inside *skb*. + */ +static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46; + +/* + * bpf_get_socket_uid + * + * + * Returns + * The owner UID of the socket associated to *skb*. If the socket + * is **NULL**, or if it is not a full socket (i.e. if it is a + * time-wait or a request socket instead), **overflowuid** value + * is returned (note that **overflowuid** might also be the actual + * UID value for the socket). + */ +static __u32 (*bpf_get_socket_uid)(struct __sk_buff *skb) = (void *) 47; + +/* + * bpf_set_hash + * + * Set the full hash for *skb* (set the field *skb*\ **->hash**) + * to value *hash*. + * + * Returns + * 0 + */ +static __u32 (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48; + +/* + * bpf_setsockopt + * + * Emulate a call to **setsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **setsockopt(2)** for more information. + * The option value of length *optlen* is pointed by *optval*. + * + * This helper actually implements a subset of **setsockopt()**. + * It supports the following *level*\ s: + * + * * **SOL_SOCKET**, which supports the following *optname*\ s: + * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * * **IPPROTO_TCP**, which supports the following *optname*\ s: + * **TCP_CONGESTION**, **TCP_BPF_IW**, + * **TCP_BPF_SNDCWND_CLAMP**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_setsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49; + +/* + * bpf_skb_adjust_room + * + * Grow or shrink the room for data in the packet associated to + * *skb* by *len_diff*, and according to the selected *mode*. + * + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). + * + * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer + * (room space is added or removed below the layer 3 header). + * + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, + * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: + * Use with ENCAP_L3 flags to further specify the tunnel type. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): + * Use with ENCAP_L3/L4 flags to further specify the tunnel + * type; *len* is the length of the inner MAC header. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 mode, __u64 flags) = (void *) 50; + +/* + * bpf_redirect_map + * + * Redirect the packet to the endpoint referenced by *map* at + * index *key*. Depending on its type, this *map* can contain + * references to net devices (for forwarding packets through other + * ports), or to CPUs (for redirecting XDP frames to another CPU; + * but this is only implemented for native XDP (with driver + * support) as of this writing). + * + * The lower two bits of *flags* are used as the return code if + * the map lookup fails. This is so that the return value can be + * one of the XDP program return codes up to XDP_TX, as chosen by + * the caller. Any higher bits in the *flags* argument must be + * unset. + * + * When used to redirect packets to net devices, this helper + * provides a high performance increase over **bpf_redirect**\ (). + * This is due to various implementation details of the underlying + * mechanisms, one of which is the fact that **bpf_redirect_map**\ + * () tries to send packet as a "bulk" to the device. + * + * Returns + * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + */ +static int (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51; + +/* + * bpf_sk_redirect_map + * + * Redirect the packet to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * + * Returns + * **SK_PASS** on success, or **SK_DROP** on error. + */ +static int (*bpf_sk_redirect_map)(struct __sk_buff *skb, void *map, __u32 key, __u64 flags) = (void *) 52; + +/* + * bpf_sock_map_update + * + * Add an entry to, or update a *map* referencing sockets. The + * *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_sock_map_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 53; + +/* + * bpf_xdp_adjust_meta + * + * Adjust the address pointed by *xdp_md*\ **->data_meta** by + * *delta* (which can be positive or negative). Note that this + * operation modifies the address stored in *xdp_md*\ **->data**, + * so the latter must be loaded only after the helper has been + * called. + * + * The use of *xdp_md*\ **->data_meta** is optional and programs + * are not required to use it. The rationale is that when the + * packet is processed with XDP (e.g. as DoS filter), it is + * possible to push further meta data along with it before passing + * to the stack, and to give the guarantee that an ingress eBPF + * program attached as a TC classifier on the same device can pick + * this up for further post-processing. Since TC works with socket + * buffers, it remains possible to set from XDP the **mark** or + * **priority** pointers, or other pointers for the socket buffer. + * Having this scratch space generic and programmable allows for + * more flexibility as the user is free to store whatever meta + * data they need. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54; + +/* + * bpf_perf_event_read_value + * + * Read the value of a perf event counter, and store it into *buf* + * of size *buf_size*. This helper relies on a *map* of type + * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event + * counter is selected when *map* is updated with perf event file + * descriptors. The *map* is an array whose size is the number of + * available CPUs, and each cell contains a value relative to one + * CPU. The value to retrieve is indicated by *flags*, that + * contains the index of the CPU to look up, masked with + * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * This helper behaves in a way close to + * **bpf_perf_event_read**\ () helper, save that instead of + * just returning the value observed, it fills the *buf* + * structure. This allows for additional data to be retrieved: in + * particular, the enabled and running times (in *buf*\ + * **->enabled** and *buf*\ **->running**, respectively) are + * copied. In general, **bpf_perf_event_read_value**\ () is + * recommended over **bpf_perf_event_read**\ (), which has some + * ABI issues and provides fewer functionalities. + * + * These values are interesting, because hardware PMU (Performance + * Monitoring Unit) counters are limited resources. When there are + * more PMU based perf events opened than available counters, + * kernel will multiplex these events so each event gets certain + * percentage (but not all) of the PMU time. In case that + * multiplexing happens, the number of samples or counter value + * will not reflect the case compared to when no multiplexing + * occurs. This makes comparison between different runs difficult. + * Typically, the counter value should be normalized before + * comparing to other experiments. The usual normalization is done + * as follows. + * + * :: + * + * normalized_counter = counter * t_enabled / t_running + * + * Where t_enabled is the time enabled for event and t_running is + * the time running for event since last normalization. The + * enabled and running times are accumulated since the perf event + * open. To achieve scaling factor between two invocations of an + * eBPF program, users can can use CPU id as the key (which is + * typical for perf array usage model) to remember the previous + * value and do the calculation inside the eBPF program. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 55; + +/* + * bpf_perf_prog_read_value + * + * For en eBPF program attached to a perf event, retrieve the + * value of the event counter associated to *ctx* and store it in + * the structure pointed by *buf* and of size *buf_size*. Enabled + * and running times are also stored in the structure (see + * description of helper **bpf_perf_event_read_value**\ () for + * more details). + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 56; + +/* + * bpf_getsockopt + * + * Emulate a call to **getsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **getsockopt(2)** for more information. + * The retrieved value is stored in the structure pointed by + * *opval* and of length *optlen*. + * + * This helper actually implements a subset of **getsockopt()**. + * It supports the following *level*\ s: + * + * * **IPPROTO_TCP**, which supports *optname* + * **TCP_CONGESTION**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_getsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57; + +/* + * bpf_override_return + * + * Used for error injection, this helper uses kprobes to override + * the return value of the probed function, and to set it to *rc*. + * The first argument is the context *regs* on which the kprobe + * works. + * + * This helper works by setting setting the PC (program counter) + * to an override function which is run in place of the original + * probed function. This means the probed function is not run at + * all. The replacement function just returns with the required + * value. + * + * This helper has security implications, and thus is subject to + * restrictions. It is only available if the kernel was compiled + * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration + * option, and in this case it only works on functions tagged with + * **ALLOW_ERROR_INJECTION** in the kernel code. + * + * Also, the helper is only available for the architectures having + * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, + * x86 architecture is the only one to support this feature. + * + * Returns + * 0 + */ +static int (*bpf_override_return)(struct pt_regs *regs, __u64 rc) = (void *) 58; + +/* + * bpf_sock_ops_cb_flags_set + * + * Attempt to set the value of the **bpf_sock_ops_cb_flags** field + * for the full TCP socket associated to *bpf_sock_ops* to + * *argval*. + * + * The primary use of this field is to determine if there should + * be calls to eBPF programs of type + * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP + * code. A program of the same type can change its value, per + * connection and as necessary, when the connection is + * established. This field is directly accessible for reading, but + * this helper must be used for updates in order to return an + * error if an eBPF program tries to set a callback that is not + * supported in the current kernel. + * + * *argval* is a flag array which can combine these flags: + * + * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) + * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) + * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) + * + * Therefore, this function can be used to clear a callback flag by + * setting the appropriate bit to zero. e.g. to disable the RTO + * callback: + * + * **bpf_sock_ops_cb_flags_set(bpf_sock,** + * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** + * + * Here are some examples of where one could call such eBPF + * program: + * + * * When RTO fires. + * * When a packet is retransmitted. + * * When the connection terminates. + * * When a packet is sent. + * * When a packet is received. + * + * Returns + * Code **-EINVAL** if the socket is not a full TCP socket; + * otherwise, a positive number containing the bits that could not + * be set is returned (which comes down to 0 if all bits were set + * as required). + */ +static int (*bpf_sock_ops_cb_flags_set)(struct bpf_sock_ops *bpf_sock, int argval) = (void *) 59; + +/* + * bpf_msg_redirect_map + * + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * + * Returns + * **SK_PASS** on success, or **SK_DROP** on error. + */ +static int (*bpf_msg_redirect_map)(struct sk_msg_md *msg, void *map, __u32 key, __u64 flags) = (void *) 60; + +/* + * bpf_msg_apply_bytes + * + * For socket policies, apply the verdict of the eBPF program to + * the next *bytes* (number of bytes) of message *msg*. + * + * For example, this helper can be used in the following cases: + * + * * A single **sendmsg**\ () or **sendfile**\ () system call + * contains multiple logical messages that the eBPF program is + * supposed to read and for which it should apply a verdict. + * * An eBPF program only cares to read the first *bytes* of a + * *msg*. If the message has a large payload, then setting up + * and calling the eBPF program repeatedly for all bytes, even + * though the verdict is already known, would create unnecessary + * overhead. + * + * When called from within an eBPF program, the helper sets a + * counter internal to the BPF infrastructure, that is used to + * apply the last verdict to the next *bytes*. If *bytes* is + * smaller than the current data being processed from a + * **sendmsg**\ () or **sendfile**\ () system call, the first + * *bytes* will be sent and the eBPF program will be re-run with + * the pointer for start of data pointing to byte number *bytes* + * **+ 1**. If *bytes* is larger than the current data being + * processed, then the eBPF verdict will be applied to multiple + * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are + * consumed. + * + * Note that if a socket closes with the internal counter holding + * a non-zero value, this is not a problem because data is not + * being buffered for *bytes* and is sent as it is received. + * + * Returns + * 0 + */ +static int (*bpf_msg_apply_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 61; + +/* + * bpf_msg_cork_bytes + * + * For socket policies, prevent the execution of the verdict eBPF + * program for message *msg* until *bytes* (byte number) have been + * accumulated. + * + * This can be used when one needs a specific number of bytes + * before a verdict can be assigned, even if the data spans + * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme + * case would be a user calling **sendmsg**\ () repeatedly with + * 1-byte long message segments. Obviously, this is bad for + * performance, but it is still valid. If the eBPF program needs + * *bytes* bytes to validate a header, this helper can be used to + * prevent the eBPF program to be called again until *bytes* have + * been accumulated. + * + * Returns + * 0 + */ +static int (*bpf_msg_cork_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 62; + +/* + * bpf_msg_pull_data + * + * For socket policies, pull in non-linear data from user space + * for *msg* and set pointers *msg*\ **->data** and *msg*\ + * **->data_end** to *start* and *end* bytes offsets into *msg*, + * respectively. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it can only parse data that the (**data**, **data_end**) + * pointers have already consumed. For **sendmsg**\ () hooks this + * is likely the first scatterlist element. But for calls relying + * on the **sendpage** handler (e.g. **sendfile**\ ()) this will + * be the range (**0**, **0**) because the data is shared with + * user space and by default the objective is to avoid allowing + * user space to modify data while (or after) eBPF verdict is + * being decided. This helper can be used to pull in data and to + * set the start and end pointer to given values. Data will be + * copied if necessary (i.e. if data was not linear and if start + * and end pointers do not point to the same chunk). + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, __u64 flags) = (void *) 63; + +/* + * bpf_bind + * + * Bind the socket associated to *ctx* to the address pointed by + * *addr*, of length *addr_len*. This allows for making outgoing + * connection from the desired IP address, which can be useful for + * example when all processes inside a cgroup should use one + * single IP address on a host that has multiple IP configured. + * + * This helper works for IPv4 and IPv6, TCP and UDP sockets. The + * domain (*addr*\ **->sa_family**) must be **AF_INET** (or + * **AF_INET6**). Looking for a free port to bind to can be + * expensive, therefore binding to port is not permitted by the + * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) + * must be set to zero. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) = (void *) 64; + +/* + * bpf_xdp_adjust_tail + * + * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is + * only possible to shrink the packet as of this writing, + * therefore *delta* must be a negative integer. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_xdp_adjust_tail)(struct xdp_md *xdp_md, int delta) = (void *) 65; + +/* + * bpf_skb_get_xfrm_state + * + * Retrieve the XFRM state (IP transform framework, see also + * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. + * + * The retrieved value is stored in the **struct bpf_xfrm_state** + * pointed by *xfrm_state* and of length *size*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_XFRM** configuration option. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct bpf_xfrm_state *xfrm_state, __u32 size, __u64 flags) = (void *) 66; + +/* + * bpf_get_stack + * + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *ctx*, which is a pointer + * to the context on which the tracing program is executed. + * To store the stacktrace, the bpf program provides *buf* with + * a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * + * Returns + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + */ +static int (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67; + +/* + * bpf_skb_load_bytes_relative + * + * This helper is similar to **bpf_skb_load_bytes**\ () in that + * it provides an easy way to load *len* bytes from *offset* + * from the packet associated to *skb*, into the buffer pointed + * by *to*. The difference to **bpf_skb_load_bytes**\ () is that + * a fifth argument *start_header* exists in order to select a + * base offset to start from. *start_header* can be one of: + * + * **BPF_HDR_START_MAC** + * Base offset to load data from is *skb*'s mac header. + * **BPF_HDR_START_NET** + * Base offset to load data from is *skb*'s network header. + * + * In general, "direct packet access" is the preferred method to + * access packet data, however, this helper is in particular useful + * in socket filters where *skb*\ **->data** does not always point + * to the start of the mac header and where "direct packet access" + * is not available. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *to, __u32 len, __u32 start_header) = (void *) 68; + +/* + * bpf_fib_lookup + * + * Do FIB lookup in kernel tables using parameters in *params*. + * If lookup is successful and result shows packet is to be + * forwarded, the neighbor tables are searched for the nexthop. + * If successful (ie., FIB lookup shows forwarding and nexthop + * is resolved), the nexthop address is returned in ipv4_dst + * or ipv6_dst based on family, smac is set to mac address of + * egress device, dmac is set to nexthop mac address, rt_metric + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. + * + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * + * Returns + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * packet is not forwarded or needs assist from full stack + */ +static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) 69; + +/* + * bpf_sock_hash_update + * + * Add an entry to, or update a sockhash *map* referencing sockets. + * The *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_sock_hash_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 70; + +/* + * bpf_msg_redirect_hash + * + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * + * Returns + * **SK_PASS** on success, or **SK_DROP** on error. + */ +static int (*bpf_msg_redirect_hash)(struct sk_msg_md *msg, void *map, void *key, __u64 flags) = (void *) 71; + +/* + * bpf_sk_redirect_hash + * + * This helper is used in programs implementing policies at the + * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. + * if the verdeict eBPF program returns **SK_PASS**), redirect it + * to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress otherwise). This is the only flag supported for now. + * + * Returns + * **SK_PASS** on success, or **SK_DROP** on error. + */ +static int (*bpf_sk_redirect_hash)(struct __sk_buff *skb, void *map, void *key, __u64 flags) = (void *) 72; + +/* + * bpf_lwt_push_encap + * + * Encapsulate the packet associated to *skb* within a Layer 3 + * protocol header. This header is provided in the buffer at + * address *hdr*, with *len* its size in bytes. *type* indicates + * the protocol of the header and can be one of: + * + * **BPF_LWT_ENCAP_SEG6** + * IPv6 encapsulation with Segment Routing Header + * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, + * the IPv6 header is computed by the kernel. + * **BPF_LWT_ENCAP_SEG6_INLINE** + * Only works if *skb* contains an IPv6 packet. Insert a + * Segment Routing Header (**struct ipv6_sr_hdr**) inside + * the IPv6 header. + * **BPF_LWT_ENCAP_IP** + * IP encapsulation (GRE/GUE/IPIP/etc). The outer header + * must be IPv4 or IPv6, followed by zero or more + * additional headers, up to **LWT_BPF_MAX_HEADROOM** + * total bytes in all prepended headers. Please note that + * if **skb_is_gso**\ (*skb*) is true, no more than two + * headers can be prepended, and the inner header, if + * present, should be either GRE or UDP/GUE. + * + * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs + * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can + * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and + * **BPF_PROG_TYPE_LWT_XMIT**. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_lwt_push_encap)(struct __sk_buff *skb, __u32 type, void *hdr, __u32 len) = (void *) 73; + +/* + * bpf_lwt_seg6_store_bytes + * + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. Only the flags, tag and TLVs + * inside the outermost IPv6 Segment Routing Header can be + * modified through this helper. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_lwt_seg6_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len) = (void *) 74; + +/* + * bpf_lwt_seg6_adjust_srh + * + * Adjust the size allocated to TLVs in the outermost IPv6 + * Segment Routing Header contained in the packet associated to + * *skb*, at position *offset* by *delta* bytes. Only offsets + * after the segments are accepted. *delta* can be as well + * positive (growing) as negative (shrinking). + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_lwt_seg6_adjust_srh)(struct __sk_buff *skb, __u32 offset, __s32 delta) = (void *) 75; + +/* + * bpf_lwt_seg6_action + * + * Apply an IPv6 Segment Routing action of type *action* to the + * packet associated to *skb*. Each action takes a parameter + * contained at address *param*, and of length *param_len* bytes. + * *action* can be one of: + * + * **SEG6_LOCAL_ACTION_END_X** + * End.X action: Endpoint with Layer-3 cross-connect. + * Type of *param*: **struct in6_addr**. + * **SEG6_LOCAL_ACTION_END_T** + * End.T action: Endpoint with specific IPv6 table lookup. + * Type of *param*: **int**. + * **SEG6_LOCAL_ACTION_END_B6** + * End.B6 action: Endpoint bound to an SRv6 policy. + * Type of *param*: **struct ipv6_sr_hdr**. + * **SEG6_LOCAL_ACTION_END_B6_ENCAP** + * End.B6.Encap action: Endpoint bound to an SRv6 + * encapsulation policy. + * Type of *param*: **struct ipv6_sr_hdr**. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_lwt_seg6_action)(struct __sk_buff *skb, __u32 action, void *param, __u32 param_len) = (void *) 76; + +/* + * bpf_rc_repeat + * + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. + * + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * + * Returns + * 0 + */ +static int (*bpf_rc_repeat)(void *ctx) = (void *) 77; + +/* + * bpf_rc_keydown + * + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). + * + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * + * Returns + * 0 + */ +static int (*bpf_rc_keydown)(void *ctx, __u32 protocol, __u64 scancode, __u32 toggle) = (void *) 78; + +/* + * bpf_skb_cgroup_id + * + * Return the cgroup v2 id of the socket associated with the *skb*. + * This is roughly similar to the **bpf_get_cgroup_classid**\ () + * helper for cgroup v1 by providing a tag resp. identifier that + * can be matched on or used for map lookups e.g. to implement + * policy. The cgroup v2 id of a given path in the hierarchy is + * exposed in user space through the f_handle API in order to get + * to the same 64-bit id. + * + * This helper can be used on TC egress path, but not on ingress, + * and is available only if the kernel was compiled with the + * **CONFIG_SOCK_CGROUP_DATA** configuration option. + * + * Returns + * The id is returned or 0 in case the id could not be retrieved. + */ +static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79; + +/* + * bpf_get_current_cgroup_id + * + * + * Returns + * A 64-bit integer containing the current cgroup id based + * on the cgroup within which the current task is running. + */ +static __u64 (*bpf_get_current_cgroup_id)(void) = (void *) 80; + +/* + * bpf_get_local_storage + * + * Get the pointer to the local storage area. + * The type and the size of the local storage is defined + * by the *map* argument. + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, + * running simultaneously. + * + * A user should care about the synchronization by himself. + * For example, by using the **BPF_STX_XADD** instruction to alter + * the shared data. + * + * Returns + * A pointer to the local storage area. + */ +static void *(*bpf_get_local_storage)(void *map, __u64 flags) = (void *) 81; + +/* + * bpf_sk_select_reuseport + * + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_sk_select_reuseport)(struct sk_reuseport_md *reuse, void *map, void *key, __u64 flags) = (void *) 82; + +/* + * bpf_skb_ancestor_cgroup_id + * + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * + * Returns + * The id is returned or 0 in case the id could not be retrieved. + */ +static __u64 (*bpf_skb_ancestor_cgroup_id)(struct __sk_buff *skb, int ancestor_level) = (void *) 83; + +/* + * bpf_sk_lookup_tcp + * + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * + * Returns + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. + */ +static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 84; + +/* + * bpf_sk_lookup_udp + * + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * + * Returns + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. + */ +static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 85; + +/* + * bpf_sk_release + * + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_sk_release)(struct bpf_sock *sock) = (void *) 86; + +/* + * bpf_map_push_elem + * + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_map_push_elem)(void *map, const void *value, __u64 flags) = (void *) 87; + +/* + * bpf_map_pop_elem + * + * Pop an element from *map*. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_map_pop_elem)(void *map, void *value) = (void *) 88; + +/* + * bpf_map_peek_elem + * + * Get an element from *map* without removing it. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_map_peek_elem)(void *map, void *value) = (void *) 89; + +/* + * bpf_msg_push_data + * + * For socket policies, insert *len* bytes into *msg* at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the *msg*. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_msg_push_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 90; + +/* + * bpf_msg_pop_data + * + * Will remove *len* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of *msg* + * payload and/or *pop* value being to large. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_msg_pop_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 91; + +/* + * bpf_rc_pointer_rel + * + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * + * Returns + * 0 + */ +static int (*bpf_rc_pointer_rel)(void *ctx, __s32 rel_x, __s32 rel_y) = (void *) 92; + +/* + * bpf_spin_lock + * + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * + * Returns + * 0 + */ +static int (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) 93; + +/* + * bpf_spin_unlock + * + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * + * Returns + * 0 + */ +static int (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) 94; + +/* + * bpf_sk_fullsock + * + * This helper gets a **struct bpf_sock** pointer such + * that all the fields in this **bpf_sock** can be accessed. + * + * Returns + * A **struct bpf_sock** pointer on success, or **NULL** in + * case of failure. + */ +static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) 95; + +/* + * bpf_tcp_sock + * + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * + * Returns + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in + * case of failure. + */ +static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) 96; + +/* + * bpf_skb_ecn_set_ce + * + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * + * Returns + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. + */ +static int (*bpf_skb_ecn_set_ce)(struct __sk_buff *skb) = (void *) 97; + +/* + * bpf_get_listener_sock + * + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. + * + * Returns + * A **struct bpf_sock** pointer on success, or **NULL** in + * case of failure. + */ +static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) 98; + +/* + * bpf_skc_lookup_tcp + * + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to **bpf_sk_lookup_tcp**\ (), except + * that it also returns timewait or request sockets. Use + * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the + * full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * + * Returns + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. + */ +static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 99; + +/* + * bpf_tcp_check_syncookie + * + * Check whether *iph* and *th* contain a valid SYN cookie ACK for + * the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains **sizeof**\ (**struct tcphdr**). + * + * + * Returns + * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative + * error otherwise. + */ +static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 100; + +/* + * bpf_sysctl_get_name + * + * Get name of sysctl in /proc/sys/ and copy it into provided by + * program buffer *buf* of size *buf_len*. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is + * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name + * only (e.g. "tcp_mem"). + * + * Returns + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + */ +static int (*bpf_sysctl_get_name)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len, __u64 flags) = (void *) 101; + +/* + * bpf_sysctl_get_current_value + * + * Get current value of sysctl as it is presented in /proc/sys + * (incl. newline, etc), and copy it as a string into provided + * by program buffer *buf* of size *buf_len*. + * + * The whole value is copied, no matter what file position user + * space issued e.g. sys_read at. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * Returns + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if current value was unavailable, e.g. because + * sysctl is uninitialized and read returns -EIO for it. + */ +static int (*bpf_sysctl_get_current_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 102; + +/* + * bpf_sysctl_get_new_value + * + * Get new value being written by user space to sysctl (before + * the actual write happens) and copy it as a string into + * provided by program buffer *buf* of size *buf_len*. + * + * User space may write new value at file position > 0. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * Returns + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if sysctl is being read. + */ +static int (*bpf_sysctl_get_new_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 103; + +/* + * bpf_sysctl_set_new_value + * + * Override new value being written by user space to sysctl with + * value provided by program in buffer *buf* of size *buf_len*. + * + * *buf* should contain a string in same form as provided by user + * space on sysctl write. + * + * User space may write new value at file position > 0. To override + * the whole sysctl value file position should be set to zero. + * + * Returns + * 0 on success. + * + * **-E2BIG** if the *buf_len* is too big. + * + * **-EINVAL** if sysctl is being read. + */ +static int (*bpf_sysctl_set_new_value)(struct bpf_sysctl *ctx, const char *buf, unsigned long buf_len) = (void *) 104; + +/* + * bpf_strtol + * + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to a long integer according to the given base + * and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by **isspace**\ (3)) followed by a single + * optional '**-**' sign. + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space **strtol**\ (3). + * + * Returns + * Number of characters consumed on success. Must be positive but + * no more than *buf_len*. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + */ +static int (*bpf_strtol)(const char *buf, unsigned long buf_len, __u64 flags, long *res) = (void *) 105; + +/* + * bpf_strtoul + * + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to an unsigned long integer according to the + * given base and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by **isspace**\ (3)). + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space **strtoul**\ (3). + * + * Returns + * Number of characters consumed on success. Must be positive but + * no more than *buf_len*. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + */ +static int (*bpf_strtoul)(const char *buf, unsigned long buf_len, __u64 flags, unsigned long *res) = (void *) 106; + +/* + * bpf_sk_storage_get + * + * Get a bpf-local-storage from a *sk*. + * + * Logically, it could be thought of getting the value from + * a *map* with *sk* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this + * helper enforces the key must be a full socket and the map must + * be a **BPF_MAP_TYPE_SK_STORAGE** also. + * + * Underneath, the value is stored locally at *sk* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf-local-storages residing at *sk*. + * + * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf-local-storage will be + * created if one does not exist. *value* can be used + * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf-local-storage. If *value* is + * **NULL**, the new bpf-local-storage will be zero initialized. + * + * Returns + * A bpf-local-storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf-local-storage. + */ +static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, void *value, __u64 flags) = (void *) 107; + +/* + * bpf_sk_storage_delete + * + * Delete a bpf-local-storage from a *sk*. + * + * Returns + * 0 on success. + * + * **-ENOENT** if the bpf-local-storage cannot be found. + */ +static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = (void *) 108; + +/* + * bpf_send_signal + * + * Send signal *sig* to the current task. + * + * Returns + * 0 on success or successfully queued. + * + * **-EBUSY** if work queue under nmi is full. + * + * **-EINVAL** if *sig* is invalid. + * + * **-EPERM** if no permission to send the *sig*. + * + * **-EAGAIN** if bpf program can try again. + */ +static int (*bpf_send_signal)(__u32 sig) = (void *) 109; + +/* + * bpf_tcp_gen_syncookie + * + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * + * Returns + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 + */ +static __s64 (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 110; + +/* + * bpf_skb_output + * + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct sk_buff. + * + * This helper is similar to **bpf_perf_event_output**\ () but + * restricted to raw_tracepoint bpf programs. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_skb_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 111; + +/* + * bpf_probe_read_user + * + * Safely attempt to read *size* bytes from user space address + * *unsafe_ptr* and store the data in *dst*. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112; + +/* + * bpf_probe_read_kernel + * + * Safely attempt to read *size* bytes from kernel space address + * *unsafe_ptr* and store the data in *dst*. + * + * Returns + * 0 on success, or a negative error in case of failure. + */ +static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113; + +/* + * bpf_probe_read_user_str + * + * Copy a NUL terminated string from an unsafe user address + * *unsafe_ptr* to *dst*. The *size* should include the + * terminating NUL byte. In case the string length is smaller than + * *size*, the target is not padded with further NUL bytes. If the + * string length is larger than *size*, just *size*-1 bytes are + * copied and the last byte is set to NUL. + * + * On success, the length of the copied string is returned. This + * makes this helper useful in tracing programs for reading + * strings, and more importantly to get its length at runtime. See + * the following snippet: + * + * :: + * + * SEC("kprobe/sys_open") + * void bpf_sys_open(struct pt_regs *ctx) + * { + * char buf[PATHLEN]; // PATHLEN is defined to 256 + * int res = bpf_probe_read_user_str(buf, sizeof(buf), + * ctx->di); + * + * // Consume buf, for example push it to + * // userspace via bpf_perf_event_output(); we + * // can use res (the string length) as event + * // size, after checking its boundaries. + * } + * + * In comparison, using **bpf_probe_read_user()** helper here + * instead to read the string would require to estimate the length + * at compile time, and would often result in copying more memory + * than necessary. + * + * Another useful use case is when parsing individual process + * arguments or individual environment variables navigating + * *current*\ **->mm->arg_start** and *current*\ + * **->mm->env_start**: using this helper and the return value, + * one can quickly iterate at the right offset of the memory area. + * + * Returns + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + */ +static int (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 114; + +/* + * bpf_probe_read_kernel_str + * + * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* + * to *dst*. Same semantics as with bpf_probe_read_user_str() apply. + * + * Returns + * On success, the strictly positive length of the string, including + * the trailing NUL character. On error, a negative value. + */ +static int (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115; + + diff --git a/src/contrib/libbpf/bpf/bpf_helpers.h b/src/contrib/libbpf/bpf/bpf_helpers.h new file mode 100644 index 0000000..0c7d282 --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf_helpers.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_HELPERS__ +#define __BPF_HELPERS__ + +#include "bpf_helper_defs.h" + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) typeof(val) *name + +/* Helper macro to print out debug messages */ +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* + * Helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +#ifndef __always_inline +#define __always_inline __attribute__((always_inline)) +#endif + +/* + * Helper structure used by eBPF C program + * to describe BPF map attributes to libbpf loader + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + +#endif diff --git a/src/contrib/libbpf/bpf/bpf_prog_linfo.c b/src/contrib/libbpf/bpf/bpf_prog_linfo.c new file mode 100644 index 0000000..3ed1a27 --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf_prog_linfo.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include <string.h> +#include <stdlib.h> +#include <linux/err.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "libbpf_internal.h" + +struct bpf_prog_linfo { + void *raw_linfo; + void *raw_jited_linfo; + __u32 *nr_jited_linfo_per_func; + __u32 *jited_linfo_func_idx; + __u32 nr_linfo; + __u32 nr_jited_func; + __u32 rec_size; + __u32 jited_rec_size; +}; + +static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, + const __u64 *ksym_func, const __u32 *ksym_len) +{ + __u32 nr_jited_func, nr_linfo; + const void *raw_jited_linfo; + const __u64 *jited_linfo; + __u64 last_jited_linfo; + /* + * Index to raw_jited_linfo: + * i: Index for searching the next ksym_func + * prev_i: Index to the last found ksym_func + */ + __u32 i, prev_i; + __u32 f; /* Index to ksym_func */ + + raw_jited_linfo = prog_linfo->raw_jited_linfo; + jited_linfo = raw_jited_linfo; + if (ksym_func[0] != *jited_linfo) + goto errout; + + prog_linfo->jited_linfo_func_idx[0] = 0; + nr_jited_func = prog_linfo->nr_jited_func; + nr_linfo = prog_linfo->nr_linfo; + + for (prev_i = 0, i = 1, f = 1; + i < nr_linfo && f < nr_jited_func; + i++) { + raw_jited_linfo += prog_linfo->jited_rec_size; + last_jited_linfo = *jited_linfo; + jited_linfo = raw_jited_linfo; + + if (ksym_func[f] == *jited_linfo) { + prog_linfo->jited_linfo_func_idx[f] = i; + + /* Sanity check */ + if (last_jited_linfo - ksym_func[f - 1] + 1 > + ksym_len[f - 1]) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[f - 1] = + i - prev_i; + prev_i = i; + + /* + * The ksym_func[f] is found in jited_linfo. + * Look for the next one. + */ + f++; + } else if (*jited_linfo <= last_jited_linfo) { + /* Ensure the addr is increasing _within_ a func */ + goto errout; + } + } + + if (f != nr_jited_func) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = + nr_linfo - prev_i; + + return 0; + +errout: + return -EINVAL; +} + +void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) +{ + if (!prog_linfo) + return; + + free(prog_linfo->raw_linfo); + free(prog_linfo->raw_jited_linfo); + free(prog_linfo->nr_jited_linfo_per_func); + free(prog_linfo->jited_linfo_func_idx); + free(prog_linfo); +} + +struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) +{ + struct bpf_prog_linfo *prog_linfo; + __u32 nr_linfo, nr_jited_func; + __u64 data_sz; + + nr_linfo = info->nr_line_info; + + if (!nr_linfo) + return NULL; + + /* + * The min size that bpf_prog_linfo has to access for + * searching purpose. + */ + if (info->line_info_rec_size < + offsetof(struct bpf_line_info, file_name_off)) + return NULL; + + prog_linfo = calloc(1, sizeof(*prog_linfo)); + if (!prog_linfo) + return NULL; + + /* Copy xlated line_info */ + prog_linfo->nr_linfo = nr_linfo; + prog_linfo->rec_size = info->line_info_rec_size; + data_sz = (__u64)nr_linfo * prog_linfo->rec_size; + prog_linfo->raw_linfo = malloc(data_sz); + if (!prog_linfo->raw_linfo) + goto err_free; + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz); + + nr_jited_func = info->nr_jited_ksyms; + if (!nr_jited_func || + !info->jited_line_info || + info->nr_jited_line_info != nr_linfo || + info->jited_line_info_rec_size < sizeof(__u64) || + info->nr_jited_func_lens != nr_jited_func || + !info->jited_ksyms || + !info->jited_func_lens) + /* Not enough info to provide jited_line_info */ + return prog_linfo; + + /* Copy jited_line_info */ + prog_linfo->nr_jited_func = nr_jited_func; + prog_linfo->jited_rec_size = info->jited_line_info_rec_size; + data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size; + prog_linfo->raw_jited_linfo = malloc(data_sz); + if (!prog_linfo->raw_jited_linfo) + goto err_free; + memcpy(prog_linfo->raw_jited_linfo, + (void *)(long)info->jited_line_info, data_sz); + + /* Number of jited_line_info per jited func */ + prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->nr_jited_linfo_per_func) + goto err_free; + + /* + * For each jited func, + * the start idx to the "linfo" and "jited_linfo" array, + */ + prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->jited_linfo_func_idx) + goto err_free; + + if (dissect_jited_func(prog_linfo, + (__u64 *)(long)info->jited_ksyms, + (__u32 *)(long)info->jited_func_lens)) + goto err_free; + + return prog_linfo; + +err_free: + bpf_prog_linfo__free(prog_linfo); + return NULL; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip) +{ + __u32 jited_rec_size, rec_size, nr_linfo, start, i; + const void *raw_jited_linfo, *raw_linfo; + const __u64 *jited_linfo; + + if (func_idx >= prog_linfo->nr_jited_func) + return NULL; + + nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; + if (nr_skip >= nr_linfo) + return NULL; + + start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; + jited_rec_size = prog_linfo->jited_rec_size; + raw_jited_linfo = prog_linfo->raw_jited_linfo + + (start * jited_rec_size); + jited_linfo = raw_jited_linfo; + if (addr < *jited_linfo) + return NULL; + + nr_linfo -= nr_skip; + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (start * rec_size); + for (i = 0; i < nr_linfo; i++) { + if (addr < *jited_linfo) + break; + + raw_linfo += rec_size; + raw_jited_linfo += jited_rec_size; + jited_linfo = raw_jited_linfo; + } + + return raw_linfo - rec_size; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip) +{ + const struct bpf_line_info *linfo; + __u32 rec_size, nr_linfo, i; + const void *raw_linfo; + + nr_linfo = prog_linfo->nr_linfo; + if (nr_skip >= nr_linfo) + return NULL; + + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); + linfo = raw_linfo; + if (insn_off < linfo->insn_off) + return NULL; + + nr_linfo -= nr_skip; + for (i = 0; i < nr_linfo; i++) { + if (insn_off < linfo->insn_off) + break; + + raw_linfo += rec_size; + linfo = raw_linfo; + } + + return raw_linfo - rec_size; +} diff --git a/src/contrib/libbpf/bpf/bpf_tracing.h b/src/contrib/libbpf/bpf/bpf_tracing.h new file mode 100644 index 0000000..b0dafe8 --- /dev/null +++ b/src/contrib/libbpf/bpf/bpf_tracing.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACING_H__ +#define __BPF_TRACING_H__ + +/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ +#if defined(__TARGET_ARCH_x86) + #define bpf_target_x86 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_s390) + #define bpf_target_s390 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm) + #define bpf_target_arm + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm64) + #define bpf_target_arm64 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_mips) + #define bpf_target_mips + #define bpf_target_defined +#elif defined(__TARGET_ARCH_powerpc) + #define bpf_target_powerpc + #define bpf_target_defined +#elif defined(__TARGET_ARCH_sparc) + #define bpf_target_sparc + #define bpf_target_defined +#else + #undef bpf_target_defined +#endif + +/* Fall back to what the compiler says */ +#ifndef bpf_target_defined +#if defined(__x86_64__) + #define bpf_target_x86 +#elif defined(__s390__) + #define bpf_target_s390 +#elif defined(__arm__) + #define bpf_target_arm +#elif defined(__aarch64__) + #define bpf_target_arm64 +#elif defined(__mips__) + #define bpf_target_mips +#elif defined(__powerpc__) + #define bpf_target_powerpc +#elif defined(__sparc__) + #define bpf_target_sparc +#endif +#endif + +#if defined(bpf_target_x86) + +#ifdef __KERNEL__ +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->ip) +#else +#ifdef __i386__ +/* i386 kernel is built with -mregparm=3 */ +#define PT_REGS_PARM1(x) ((x)->eax) +#define PT_REGS_PARM2(x) ((x)->edx) +#define PT_REGS_PARM3(x) ((x)->ecx) +#define PT_REGS_PARM4(x) 0 +#define PT_REGS_PARM5(x) 0 +#define PT_REGS_RET(x) ((x)->esp) +#define PT_REGS_FP(x) ((x)->ebp) +#define PT_REGS_RC(x) ((x)->eax) +#define PT_REGS_SP(x) ((x)->esp) +#define PT_REGS_IP(x) ((x)->eip) +#else +#define PT_REGS_PARM1(x) ((x)->rdi) +#define PT_REGS_PARM2(x) ((x)->rsi) +#define PT_REGS_PARM3(x) ((x)->rdx) +#define PT_REGS_PARM4(x) ((x)->rcx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->rsp) +#define PT_REGS_FP(x) ((x)->rbp) +#define PT_REGS_RC(x) ((x)->rax) +#define PT_REGS_SP(x) ((x)->rsp) +#define PT_REGS_IP(x) ((x)->rip) +#endif +#endif + +#elif defined(bpf_target_s390) + +/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_S390 const volatile user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) + +#elif defined(bpf_target_arm) + +#define PT_REGS_PARM1(x) ((x)->uregs[0]) +#define PT_REGS_PARM2(x) ((x)->uregs[1]) +#define PT_REGS_PARM3(x) ((x)->uregs[2]) +#define PT_REGS_PARM4(x) ((x)->uregs[3]) +#define PT_REGS_PARM5(x) ((x)->uregs[4]) +#define PT_REGS_RET(x) ((x)->uregs[14]) +#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->uregs[0]) +#define PT_REGS_SP(x) ((x)->uregs[13]) +#define PT_REGS_IP(x) ((x)->uregs[12]) + +#elif defined(bpf_target_arm64) + +/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_ARM64 const volatile struct user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) + +#elif defined(bpf_target_mips) + +#define PT_REGS_PARM1(x) ((x)->regs[4]) +#define PT_REGS_PARM2(x) ((x)->regs[5]) +#define PT_REGS_PARM3(x) ((x)->regs[6]) +#define PT_REGS_PARM4(x) ((x)->regs[7]) +#define PT_REGS_PARM5(x) ((x)->regs[8]) +#define PT_REGS_RET(x) ((x)->regs[31]) +#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->regs[1]) +#define PT_REGS_SP(x) ((x)->regs[29]) +#define PT_REGS_IP(x) ((x)->cp0_epc) + +#elif defined(bpf_target_powerpc) + +#define PT_REGS_PARM1(x) ((x)->gpr[3]) +#define PT_REGS_PARM2(x) ((x)->gpr[4]) +#define PT_REGS_PARM3(x) ((x)->gpr[5]) +#define PT_REGS_PARM4(x) ((x)->gpr[6]) +#define PT_REGS_PARM5(x) ((x)->gpr[7]) +#define PT_REGS_RC(x) ((x)->gpr[3]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->nip) + +#elif defined(bpf_target_sparc) + +#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +/* Should this also be a bpf_target check for the sparc case? */ +#if defined(__arch64__) +#define PT_REGS_IP(x) ((x)->tpc) +#else +#define PT_REGS_IP(x) ((x)->pc) +#endif + +#endif + +#if defined(bpf_target_powerpc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#elif defined(bpf_target_sparc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#else +#define BPF_KPROBE_READ_RET_IP(ip, ctx) \ + ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) +#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ + ({ bpf_probe_read(&(ip), sizeof(ip), \ + (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +#endif + +#endif diff --git a/src/contrib/libbpf/bpf/btf.c b/src/contrib/libbpf/bpf/btf.c new file mode 100644 index 0000000..88efa2b --- /dev/null +++ b/src/contrib/libbpf/bpf/btf.c @@ -0,0 +1,2884 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include <endian.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <linux/err.h> +#include <linux/btf.h> +#include <gelf.h> +#include "btf.h" +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" +#include "hashmap.h" + +#define BTF_MAX_NR_TYPES 0x7fffffff +#define BTF_MAX_STR_OFFSET 0x7fffffff + +static struct btf_type btf_void; + +struct btf { + union { + struct btf_header *hdr; + void *data; + }; + struct btf_type **types; + const char *strings; + void *nohdr_data; + __u32 nr_types; + __u32 types_size; + __u32 data_size; + int fd; +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static int btf_add_type(struct btf *btf, struct btf_type *t) +{ + if (btf->types_size - btf->nr_types < 2) { + struct btf_type **new_types; + __u32 expand_by, new_size; + + if (btf->types_size == BTF_MAX_NR_TYPES) + return -E2BIG; + + expand_by = max(btf->types_size >> 2, 16); + new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); + + new_types = realloc(btf->types, sizeof(*new_types) * new_size); + if (!new_types) + return -ENOMEM; + + if (btf->nr_types == 0) + new_types[0] = &btf_void; + + btf->types = new_types; + btf->types_size = new_size; + } + + btf->types[++(btf->nr_types)] = t; + + return 0; +} + +static int btf_parse_hdr(struct btf *btf) +{ + const struct btf_header *hdr = btf->hdr; + __u32 meta_left; + + if (btf->data_size < sizeof(struct btf_header)) { + pr_debug("BTF header not found\n"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + pr_debug("Invalid BTF magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + pr_debug("Unsupported BTF version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + pr_debug("Unsupported BTF flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + meta_left = btf->data_size - sizeof(*hdr); + if (!meta_left) { + pr_debug("BTF has no data\n"); + return -EINVAL; + } + + if (meta_left < hdr->type_off) { + pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); + return -EINVAL; + } + + if (meta_left < hdr->str_off) { + pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); + return -EINVAL; + } + + if (hdr->type_off >= hdr->str_off) { + pr_debug("BTF type section offset >= string section offset. No type?\n"); + return -EINVAL; + } + + if (hdr->type_off & 0x02) { + pr_debug("BTF type section is not aligned to 4 bytes\n"); + return -EINVAL; + } + + btf->nohdr_data = btf->hdr + 1; + + return 0; +} + +static int btf_parse_str_sec(struct btf *btf) +{ + const struct btf_header *hdr = btf->hdr; + const char *start = btf->nohdr_data + hdr->str_off; + const char *end = start + btf->hdr->str_len; + + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || + start[0] || end[-1]) { + pr_debug("Invalid BTF string section\n"); + return -EINVAL; + } + + btf->strings = start; + + return 0; +} + +static int btf_type_size(struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = btf_vlen(t); + + switch (btf_kind(t)) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + case BTF_KIND_VAR: + return base_size + sizeof(struct btf_var); + case BTF_KIND_DATASEC: + return base_size + vlen * sizeof(struct btf_var_secinfo); + default: + pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); + return -EINVAL; + } +} + +static int btf_parse_type_sec(struct btf *btf) +{ + struct btf_header *hdr = btf->hdr; + void *nohdr_data = btf->nohdr_data; + void *next_type = nohdr_data + hdr->type_off; + void *end_type = nohdr_data + hdr->str_off; + + while (next_type < end_type) { + struct btf_type *t = next_type; + int type_size; + int err; + + type_size = btf_type_size(t); + if (type_size < 0) + return type_size; + next_type += type_size; + err = btf_add_type(btf, t); + if (err) + return err; + } + + return 0; +} + +__u32 btf__get_nr_types(const struct btf *btf) +{ + return btf->nr_types; +} + +const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) +{ + if (type_id > btf->nr_types) + return NULL; + + return btf->types[type_id]; +} + +static bool btf_type_is_void(const struct btf_type *t) +{ + return t == &btf_void || btf_is_fwd(t); +} + +static bool btf_type_is_void_or_null(const struct btf_type *t) +{ + return !t || btf_type_is_void(t); +} + +#define MAX_RESOLVE_DEPTH 32 + +__s64 btf__resolve_size(const struct btf *btf, __u32 type_id) +{ + const struct btf_array *array; + const struct btf_type *t; + __u32 nelems = 1; + __s64 size = -1; + int i; + + t = btf__type_by_id(btf, type_id); + for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); + i++) { + switch (btf_kind(t)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_DATASEC: + size = t->size; + goto done; + case BTF_KIND_PTR: + size = sizeof(void *); + goto done; + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: + type_id = t->type; + break; + case BTF_KIND_ARRAY: + array = btf_array(t); + if (nelems && array->nelems > UINT32_MAX / nelems) + return -E2BIG; + nelems *= array->nelems; + type_id = array->type; + break; + default: + return -EINVAL; + } + + t = btf__type_by_id(btf, type_id); + } + +done: + if (size < 0) + return -EINVAL; + if (nelems && size > UINT32_MAX / nelems) + return -E2BIG; + + return nelems * size; +} + +int btf__resolve_type(const struct btf *btf, __u32 type_id) +{ + const struct btf_type *t; + int depth = 0; + + t = btf__type_by_id(btf, type_id); + while (depth < MAX_RESOLVE_DEPTH && + !btf_type_is_void_or_null(t) && + (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) { + type_id = t->type; + t = btf__type_by_id(btf, type_id); + depth++; + } + + if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t)) + return -EINVAL; + + return type_id; +} + +__s32 btf__find_by_name(const struct btf *btf, const char *type_name) +{ + __u32 i; + + if (!strcmp(type_name, "void")) + return 0; + + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t = btf->types[i]; + const char *name = btf__name_by_offset(btf, t->name_off); + + if (name && !strcmp(type_name, name)) + return i; + } + + return -ENOENT; +} + +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) +{ + __u32 i; + + if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) + return 0; + + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t = btf->types[i]; + const char *name; + + if (btf_kind(t) != kind) + continue; + name = btf__name_by_offset(btf, t->name_off); + if (name && !strcmp(type_name, name)) + return i; + } + + return -ENOENT; +} + +void btf__free(struct btf *btf) +{ + if (!btf) + return; + + if (btf->fd != -1) + close(btf->fd); + + free(btf->data); + free(btf->types); + free(btf); +} + +struct btf *btf__new(__u8 *data, __u32 size) +{ + struct btf *btf; + int err; + + btf = calloc(1, sizeof(struct btf)); + if (!btf) + return ERR_PTR(-ENOMEM); + + btf->fd = -1; + + btf->data = malloc(size); + if (!btf->data) { + err = -ENOMEM; + goto done; + } + + memcpy(btf->data, data, size); + btf->data_size = size; + + err = btf_parse_hdr(btf); + if (err) + goto done; + + err = btf_parse_str_sec(btf); + if (err) + goto done; + + err = btf_parse_type_sec(btf); + +done: + if (err) { + btf__free(btf); + return ERR_PTR(err); + } + + return btf; +} + +static bool btf_check_endianness(const GElf_Ehdr *ehdr) +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + return ehdr->e_ident[EI_DATA] == ELFDATA2LSB; +#elif __BYTE_ORDER == __BIG_ENDIAN + return ehdr->e_ident[EI_DATA] == ELFDATA2MSB; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif +} + +struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) +{ + Elf_Data *btf_data = NULL, *btf_ext_data = NULL; + int err = 0, fd = -1, idx = 0; + struct btf *btf = NULL; + Elf_Scn *scn = NULL; + Elf *elf = NULL; + GElf_Ehdr ehdr; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("failed to init libelf for %s\n", path); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + err = -errno; + pr_warn("failed to open %s: %s\n", path, strerror(errno)); + return ERR_PTR(err); + } + + err = -LIBBPF_ERRNO__FORMAT; + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + pr_warn("failed to open %s as ELF file\n", path); + goto done; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("failed to get EHDR from %s\n", path); + goto done; + } + if (!btf_check_endianness(&ehdr)) { + pr_warn("non-native ELF endianness is not supported\n"); + goto done; + } + if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { + pr_warn("failed to get e_shstrndx from %s\n", path); + goto done; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + char *name; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warn("failed to get section(%d) header from %s\n", + idx, path); + goto done; + } + name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + if (!name) { + pr_warn("failed to get section(%d) name from %s\n", + idx, path); + goto done; + } + if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = elf_getdata(scn, 0); + if (!btf_data) { + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto done; + } + continue; + } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { + btf_ext_data = elf_getdata(scn, 0); + if (!btf_ext_data) { + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto done; + } + continue; + } + } + + err = 0; + + if (!btf_data) { + err = -ENOENT; + goto done; + } + btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(btf)) + goto done; + + if (btf_ext && btf_ext_data) { + *btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size); + if (IS_ERR(*btf_ext)) + goto done; + } else if (btf_ext) { + *btf_ext = NULL; + } +done: + if (elf) + elf_end(elf); + close(fd); + + if (err) + return ERR_PTR(err); + /* + * btf is always parsed before btf_ext, so no need to clean up + * btf_ext, if btf loading failed + */ + if (IS_ERR(btf)) + return btf; + if (btf_ext && IS_ERR(*btf_ext)) { + btf__free(btf); + err = PTR_ERR(*btf_ext); + return ERR_PTR(err); + } + return btf; +} + +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = btf_vlen(t); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + const struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + ret = bpf_object__section_size(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = btf_var(t_var); + + if (!btf_is_var(t_var)) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = bpf_object__variable_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", + name); + return -ENOENT; + } + + vsi->offset = off; + } + + qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i; + + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = btf->types[i]; + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (btf_is_datasec(t)) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return err; +} + +int btf__load(struct btf *btf) +{ + __u32 log_buf_size = BPF_LOG_BUF_SIZE; + char *log_buf = NULL; + int err = 0; + + if (btf->fd >= 0) + return -EEXIST; + + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + if (btf->fd < 0) { + err = -errno; + pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (*log_buf) + pr_warn("%s\n", log_buf); + goto done; + } + +done: + free(log_buf); + return err; +} + +int btf__fd(const struct btf *btf) +{ + return btf->fd; +} + +const void *btf__get_raw_data(const struct btf *btf, __u32 *size) +{ + *size = btf->data_size; + return btf->data; +} + +const char *btf__name_by_offset(const struct btf *btf, __u32 offset) +{ + if (offset < btf->hdr->str_len) + return &btf->strings[offset]; + else + return NULL; +} + +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct bpf_btf_info btf_info = { 0 }; + __u32 len = sizeof(btf_info); + __u32 last_size; + int btf_fd; + void *ptr; + int err; + + err = 0; + *btf = NULL; + btf_fd = bpf_btf_get_fd_by_id(id); + if (btf_fd < 0) + return 0; + + /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so + * let's start with a sane default - 4KiB here - and resize it only if + * bpf_obj_get_info_by_fd() needs a bigger buffer. + */ + btf_info.btf_size = 4096; + last_size = btf_info.btf_size; + ptr = malloc(last_size); + if (!ptr) { + err = -ENOMEM; + goto exit_free; + } + + memset(ptr, 0, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + + if (!err && btf_info.btf_size > last_size) { + void *temp_ptr; + + last_size = btf_info.btf_size; + temp_ptr = realloc(ptr, last_size); + if (!temp_ptr) { + err = -ENOMEM; + goto exit_free; + } + ptr = temp_ptr; + memset(ptr, 0, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + } + + if (err || btf_info.btf_size > last_size) { + err = errno; + goto exit_free; + } + + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size); + if (IS_ERR(*btf)) { + err = PTR_ERR(*btf); + *btf = NULL; + } + +exit_free: + close(btf_fd); + free(ptr); + + return err; +} + +int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id) +{ + const struct btf_type *container_type; + const struct btf_member *key, *value; + const size_t max_name = 256; + char container_name[max_name]; + __s64 key_size, value_size; + __s32 container_id; + + if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == + max_name) { + pr_warn("map:%s length of '____btf_map_%s' is too long\n", + map_name, map_name); + return -EINVAL; + } + + container_id = btf__find_by_name(btf, container_name); + if (container_id < 0) { + pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map_name, container_name); + return container_id; + } + + container_type = btf__type_by_id(btf, container_id); + if (!container_type) { + pr_warn("map:%s cannot find BTF type for container_id:%u\n", + map_name, container_id); + return -EINVAL; + } + + if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { + pr_warn("map:%s container_name:%s is an invalid container struct\n", + map_name, container_name); + return -EINVAL; + } + + key = btf_members(container_type); + value = key + 1; + + key_size = btf__resolve_size(btf, key->type); + if (key_size < 0) { + pr_warn("map:%s invalid BTF key_type_size\n", map_name); + return key_size; + } + + if (expected_key_size != key_size) { + pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map_name, (__u32)key_size, expected_key_size); + return -EINVAL; + } + + value_size = btf__resolve_size(btf, value->type); + if (value_size < 0) { + pr_warn("map:%s invalid BTF value_type_size\n", map_name); + return value_size; + } + + if (expected_value_size != value_size) { + pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map_name, (__u32)value_size, expected_value_size); + return -EINVAL; + } + + *key_type_id = key->type; + *value_type_id = value->type; + + return 0; +} + +struct btf_ext_sec_setup_param { + __u32 off; + __u32 len; + __u32 min_rec_size; + struct btf_ext_info *ext_info; + const char *desc; +}; + +static int btf_ext_setup_info(struct btf_ext *btf_ext, + struct btf_ext_sec_setup_param *ext_sec) +{ + const struct btf_ext_info_sec *sinfo; + struct btf_ext_info *ext_info; + __u32 info_left, record_size; + /* The start of the info sec (including the __u32 record_size). */ + void *info; + + if (ext_sec->len == 0) + return 0; + + if (ext_sec->off & 0x03) { + pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", + ext_sec->desc); + return -EINVAL; + } + + info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; + info_left = ext_sec->len; + + if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) { + pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); + return -EINVAL; + } + + /* At least a record size */ + if (info_left < sizeof(__u32)) { + pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); + return -EINVAL; + } + + /* The record size needs to meet the minimum standard */ + record_size = *(__u32 *)info; + if (record_size < ext_sec->min_rec_size || + record_size & 0x03) { + pr_debug("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); + return -EINVAL; + } + + sinfo = info + sizeof(__u32); + info_left -= sizeof(__u32); + + /* If no records, return failure now so .BTF.ext won't be used. */ + if (!info_left) { + pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); + return -EINVAL; + } + + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u64 total_record_size; + __u32 num_records; + + if (info_left < sec_hdrlen) { + pr_debug("%s section header is not found in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + num_records = sinfo->num_info; + if (num_records == 0) { + pr_debug("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + total_record_size = sec_hdrlen + + (__u64)num_records * record_size; + if (info_left < total_record_size) { + pr_debug("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + info_left -= total_record_size; + sinfo = (void *)sinfo + total_record_size; + } + + ext_info = ext_sec->ext_info; + ext_info->len = ext_sec->len - sizeof(__u32); + ext_info->rec_size = record_size; + ext_info->info = info + sizeof(__u32); + + return 0; +} + +static int btf_ext_setup_func_info(struct btf_ext *btf_ext) +{ + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->func_info_off, + .len = btf_ext->hdr->func_info_len, + .min_rec_size = sizeof(struct bpf_func_info_min), + .ext_info = &btf_ext->func_info, + .desc = "func_info" + }; + + return btf_ext_setup_info(btf_ext, ¶m); +} + +static int btf_ext_setup_line_info(struct btf_ext *btf_ext) +{ + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->line_info_off, + .len = btf_ext->hdr->line_info_len, + .min_rec_size = sizeof(struct bpf_line_info_min), + .ext_info = &btf_ext->line_info, + .desc = "line_info", + }; + + return btf_ext_setup_info(btf_ext, ¶m); +} + +static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext) +{ + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->field_reloc_off, + .len = btf_ext->hdr->field_reloc_len, + .min_rec_size = sizeof(struct bpf_field_reloc), + .ext_info = &btf_ext->field_reloc_info, + .desc = "field_reloc", + }; + + return btf_ext_setup_info(btf_ext, ¶m); +} + +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + + if (data_size < offsetofend(struct btf_ext_header, hdr_len) || + data_size < hdr->hdr_len) { + pr_debug("BTF.ext header not found"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + if (data_size == hdr->hdr_len) { + pr_debug("BTF.ext has no data\n"); + return -EINVAL; + } + + return 0; +} + +void btf_ext__free(struct btf_ext *btf_ext) +{ + if (!btf_ext) + return; + free(btf_ext->data); + free(btf_ext); +} + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size) +{ + struct btf_ext *btf_ext; + int err; + + err = btf_ext_parse_hdr(data, size); + if (err) + return ERR_PTR(err); + + btf_ext = calloc(1, sizeof(struct btf_ext)); + if (!btf_ext) + return ERR_PTR(-ENOMEM); + + btf_ext->data_size = size; + btf_ext->data = malloc(size); + if (!btf_ext->data) { + err = -ENOMEM; + goto done; + } + memcpy(btf_ext->data, data, size); + + if (btf_ext->hdr->hdr_len < + offsetofend(struct btf_ext_header, line_info_len)) + goto done; + err = btf_ext_setup_func_info(btf_ext); + if (err) + goto done; + + err = btf_ext_setup_line_info(btf_ext); + if (err) + goto done; + + if (btf_ext->hdr->hdr_len < + offsetofend(struct btf_ext_header, field_reloc_len)) + goto done; + err = btf_ext_setup_field_reloc(btf_ext); + if (err) + goto done; + +done: + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } + + return btf_ext; +} + +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) +{ + *size = btf_ext->data_size; + return btf_ext->data; +} + +static int btf_ext_reloc_info(const struct btf *btf, + const struct btf_ext_info *ext_info, + const char *sec_name, __u32 insns_cnt, + void **info, __u32 *cnt) +{ + __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, record_size, existing_len, records_len; + struct btf_ext_info_sec *sinfo; + const char *info_sec_name; + __u64 remain_len; + void *data; + + record_size = ext_info->rec_size; + sinfo = ext_info->info; + remain_len = ext_info->len; + while (remain_len > 0) { + records_len = sinfo->num_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + existing_len = (*cnt) * record_size; + data = realloc(*info, existing_len + records_len); + if (!data) + return -ENOMEM; + + memcpy(data + existing_len, sinfo->data, records_len); + /* adjust insn_off only, the rest data will be passed + * to the kernel. + */ + for (i = 0; i < sinfo->num_info; i++) { + __u32 *insn_off; + + insn_off = data + existing_len + (i * record_size); + *insn_off = *insn_off / sizeof(struct bpf_insn) + + insns_cnt; + } + *info = data; + *cnt += sinfo->num_info; + return 0; + } + + return -ENOENT; +} + +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, + insns_cnt, func_info, cnt); +} + +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, + insns_cnt, line_info, cnt); +} + +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->func_info.rec_size; +} + +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->line_info.rec_size; +} + +struct btf_dedup; + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); +static void btf_dedup_free(struct btf_dedup *d); +static int btf_dedup_strings(struct btf_dedup *d); +static int btf_dedup_prim_types(struct btf_dedup *d); +static int btf_dedup_struct_types(struct btf_dedup *d); +static int btf_dedup_ref_types(struct btf_dedup *d); +static int btf_dedup_compact_types(struct btf_dedup *d); +static int btf_dedup_remap_types(struct btf_dedup *d); + +/* + * Deduplicate BTF types and strings. + * + * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF + * section with all BTF type descriptors and string data. It overwrites that + * memory in-place with deduplicated types and strings without any loss of + * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section + * is provided, all the strings referenced from .BTF.ext section are honored + * and updated to point to the right offsets after deduplication. + * + * If function returns with error, type/string data might be garbled and should + * be discarded. + * + * More verbose and detailed description of both problem btf_dedup is solving, + * as well as solution could be found at: + * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html + * + * Problem description and justification + * ===================================== + * + * BTF type information is typically emitted either as a result of conversion + * from DWARF to BTF or directly by compiler. In both cases, each compilation + * unit contains information about a subset of all the types that are used + * in an application. These subsets are frequently overlapping and contain a lot + * of duplicated information when later concatenated together into a single + * binary. This algorithm ensures that each unique type is represented by single + * BTF type descriptor, greatly reducing resulting size of BTF data. + * + * Compilation unit isolation and subsequent duplication of data is not the only + * problem. The same type hierarchy (e.g., struct and all the type that struct + * references) in different compilation units can be represented in BTF to + * various degrees of completeness (or, rather, incompleteness) due to + * struct/union forward declarations. + * + * Let's take a look at an example, that we'll use to better understand the + * problem (and solution). Suppose we have two compilation units, each using + * same `struct S`, but each of them having incomplete type information about + * struct's fields: + * + * // CU #1: + * struct S; + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * // CU #2: + * struct S; + * struct A; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * In case of CU #1, BTF data will know only that `struct B` exist (but no + * more), but will know the complete type information about `struct A`. While + * for CU #2, it will know full type information about `struct B`, but will + * only know about forward declaration of `struct A` (in BTF terms, it will + * have `BTF_KIND_FWD` type descriptor with name `B`). + * + * This compilation unit isolation means that it's possible that there is no + * single CU with complete type information describing structs `S`, `A`, and + * `B`. Also, we might get tons of duplicated and redundant type information. + * + * Additional complication we need to keep in mind comes from the fact that + * types, in general, can form graphs containing cycles, not just DAGs. + * + * While algorithm does deduplication, it also merges and resolves type + * information (unless disabled throught `struct btf_opts`), whenever possible. + * E.g., in the example above with two compilation units having partial type + * information for structs `A` and `B`, the output of algorithm will emit + * a single copy of each BTF type that describes structs `A`, `B`, and `S` + * (as well as type information for `int` and pointers), as if they were defined + * in a single compilation unit as: + * + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * Algorithm summary + * ================= + * + * Algorithm completes its work in 6 separate passes: + * + * 1. Strings deduplication. + * 2. Primitive types deduplication (int, enum, fwd). + * 3. Struct/union types deduplication. + * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func + * protos, and const/volatile/restrict modifiers). + * 5. Types compaction. + * 6. Types remapping. + * + * Algorithm determines canonical type descriptor, which is a single + * representative type for each truly unique type. This canonical type is the + * one that will go into final deduplicated BTF type information. For + * struct/unions, it is also the type that algorithm will merge additional type + * information into (while resolving FWDs), as it discovers it from data in + * other CUs. Each input BTF type eventually gets either mapped to itself, if + * that type is canonical, or to some other type, if that type is equivalent + * and was chosen as canonical representative. This mapping is stored in + * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that + * FWD type got resolved to. + * + * To facilitate fast discovery of canonical types, we also maintain canonical + * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash + * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types + * that match that signature. With sufficiently good choice of type signature + * hashing function, we can limit number of canonical types for each unique type + * signature to a very small number, allowing to find canonical type for any + * duplicated type very quickly. + * + * Struct/union deduplication is the most critical part and algorithm for + * deduplicating structs/unions is described in greater details in comments for + * `btf_dedup_is_equiv` function. + */ +int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); + int err; + + if (IS_ERR(d)) { + pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + return -EINVAL; + } + + err = btf_dedup_strings(d); + if (err < 0) { + pr_debug("btf_dedup_strings failed:%d\n", err); + goto done; + } + err = btf_dedup_prim_types(d); + if (err < 0) { + pr_debug("btf_dedup_prim_types failed:%d\n", err); + goto done; + } + err = btf_dedup_struct_types(d); + if (err < 0) { + pr_debug("btf_dedup_struct_types failed:%d\n", err); + goto done; + } + err = btf_dedup_ref_types(d); + if (err < 0) { + pr_debug("btf_dedup_ref_types failed:%d\n", err); + goto done; + } + err = btf_dedup_compact_types(d); + if (err < 0) { + pr_debug("btf_dedup_compact_types failed:%d\n", err); + goto done; + } + err = btf_dedup_remap_types(d); + if (err < 0) { + pr_debug("btf_dedup_remap_types failed:%d\n", err); + goto done; + } + +done: + btf_dedup_free(d); + return err; +} + +#define BTF_UNPROCESSED_ID ((__u32)-1) +#define BTF_IN_PROGRESS_ID ((__u32)-2) + +struct btf_dedup { + /* .BTF section to be deduped in-place */ + struct btf *btf; + /* + * Optional .BTF.ext section. When provided, any strings referenced + * from it will be taken into account when deduping strings + */ + struct btf_ext *btf_ext; + /* + * This is a map from any type's signature hash to a list of possible + * canonical representative type candidates. Hash collisions are + * ignored, so even types of various kinds can share same list of + * candidates, which is fine because we rely on subsequent + * btf_xxx_equal() checks to authoritatively verify type equality. + */ + struct hashmap *dedup_table; + /* Canonical types map */ + __u32 *map; + /* Hypothetical mapping, used during type graph equivalence checks */ + __u32 *hypot_map; + __u32 *hypot_list; + size_t hypot_cnt; + size_t hypot_cap; + /* Various option modifying behavior of algorithm */ + struct btf_dedup_opts opts; +}; + +struct btf_str_ptr { + const char *str; + __u32 new_off; + bool used; +}; + +struct btf_str_ptrs { + struct btf_str_ptr *ptrs; + const char *data; + __u32 cnt; + __u32 cap; +}; + +static long hash_combine(long h, long value) +{ + return h * 31 + value; +} + +#define for_each_dedup_cand(d, node, hash) \ + hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) + +static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id) +{ + return hashmap__append(d->dedup_table, + (void *)hash, (void *)(long)type_id); +} + +static int btf_dedup_hypot_map_add(struct btf_dedup *d, + __u32 from_id, __u32 to_id) +{ + if (d->hypot_cnt == d->hypot_cap) { + __u32 *new_list; + + d->hypot_cap += max(16, d->hypot_cap / 2); + new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); + if (!new_list) + return -ENOMEM; + d->hypot_list = new_list; + } + d->hypot_list[d->hypot_cnt++] = from_id; + d->hypot_map[from_id] = to_id; + return 0; +} + +static void btf_dedup_clear_hypot_map(struct btf_dedup *d) +{ + int i; + + for (i = 0; i < d->hypot_cnt; i++) + d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; + d->hypot_cnt = 0; +} + +static void btf_dedup_free(struct btf_dedup *d) +{ + hashmap__free(d->dedup_table); + d->dedup_table = NULL; + + free(d->map); + d->map = NULL; + + free(d->hypot_map); + d->hypot_map = NULL; + + free(d->hypot_list); + d->hypot_list = NULL; + + free(d); +} + +static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx) +{ + return (size_t)key; +} + +static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx) +{ + return 0; +} + +static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); + hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; + int i, err = 0; + + if (!d) + return ERR_PTR(-ENOMEM); + + d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; + /* dedup_table_size is now used only to force collisions in tests */ + if (opts && opts->dedup_table_size == 1) + hash_fn = btf_dedup_collision_hash_fn; + + d->btf = btf; + d->btf_ext = btf_ext; + + d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(d->dedup_table)) { + err = PTR_ERR(d->dedup_table); + d->dedup_table = NULL; + goto done; + } + + d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->map) { + err = -ENOMEM; + goto done; + } + /* special BTF "void" type is made canonical immediately */ + d->map[0] = 0; + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = d->btf->types[i]; + + /* VAR and DATASEC are never deduped and are self-canonical */ + if (btf_is_var(t) || btf_is_datasec(t)) + d->map[i] = i; + else + d->map[i] = BTF_UNPROCESSED_ID; + } + + d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->hypot_map) { + err = -ENOMEM; + goto done; + } + for (i = 0; i <= btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + +done: + if (err) { + btf_dedup_free(d); + return ERR_PTR(err); + } + + return d; +} + +typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx); + +/* + * Iterate over all possible places in .BTF and .BTF.ext that can reference + * string and pass pointer to it to a provided callback `fn`. + */ +static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) +{ + void *line_data_cur, *line_data_end; + int i, j, r, rec_size; + struct btf_type *t; + + for (i = 1; i <= d->btf->nr_types; i++) { + t = d->btf->types[i]; + r = fn(&t->name_off, ctx); + if (r) + return r; + + switch (btf_kind(t)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *m = btf_members(t); + __u16 vlen = btf_vlen(t); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_ENUM: { + struct btf_enum *m = btf_enum(t); + __u16 vlen = btf_vlen(t); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + struct btf_param *m = btf_params(t); + __u16 vlen = btf_vlen(t); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + default: + break; + } + } + + if (!d->btf_ext) + return 0; + + line_data_cur = d->btf_ext->line_info.info; + line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len; + rec_size = d->btf_ext->line_info.rec_size; + + while (line_data_cur < line_data_end) { + struct btf_ext_info_sec *sec = line_data_cur; + struct bpf_line_info_min *line_info; + __u32 num_info = sec->num_info; + + r = fn(&sec->sec_name_off, ctx); + if (r) + return r; + + line_data_cur += sizeof(struct btf_ext_info_sec); + for (i = 0; i < num_info; i++) { + line_info = line_data_cur; + r = fn(&line_info->file_name_off, ctx); + if (r) + return r; + r = fn(&line_info->line_off, ctx); + if (r) + return r; + line_data_cur += rec_size; + } + } + + return 0; +} + +static int str_sort_by_content(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + return strcmp(p1->str, p2->str); +} + +static int str_sort_by_offset(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + if (p1->str != p2->str) + return p1->str < p2->str ? -1 : 1; + return 0; +} + +static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) +{ + const struct btf_str_ptr *p = pelem; + + if (str_ptr != p->str) + return (const char *)str_ptr < p->str ? -1 : 1; + return 0; +} + +static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + s->used = true; + return 0; +} + +static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + *str_off_ptr = s->new_off; + return 0; +} + +/* + * Dedup string and filter out those that are not referenced from either .BTF + * or .BTF.ext (if provided) sections. + * + * This is done by building index of all strings in BTF's string section, + * then iterating over all entities that can reference strings (e.g., type + * names, struct field names, .BTF.ext line info, etc) and marking corresponding + * strings as used. After that all used strings are deduped and compacted into + * sequential blob of memory and new offsets are calculated. Then all the string + * references are iterated again and rewritten using new offsets. + */ +static int btf_dedup_strings(struct btf_dedup *d) +{ + const struct btf_header *hdr = d->btf->hdr; + char *start = (char *)d->btf->nohdr_data + hdr->str_off; + char *end = start + d->btf->hdr->str_len; + char *p = start, *tmp_strs = NULL; + struct btf_str_ptrs strs = { + .cnt = 0, + .cap = 0, + .ptrs = NULL, + .data = start, + }; + int i, j, err = 0, grp_idx; + bool grp_used; + + /* build index of all strings */ + while (p < end) { + if (strs.cnt + 1 > strs.cap) { + struct btf_str_ptr *new_ptrs; + + strs.cap += max(strs.cnt / 2, 16); + new_ptrs = realloc(strs.ptrs, + sizeof(strs.ptrs[0]) * strs.cap); + if (!new_ptrs) { + err = -ENOMEM; + goto done; + } + strs.ptrs = new_ptrs; + } + + strs.ptrs[strs.cnt].str = p; + strs.ptrs[strs.cnt].used = false; + + p += strlen(p) + 1; + strs.cnt++; + } + + /* temporary storage for deduplicated strings */ + tmp_strs = malloc(d->btf->hdr->str_len); + if (!tmp_strs) { + err = -ENOMEM; + goto done; + } + + /* mark all used strings */ + strs.ptrs[0].used = true; + err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); + if (err) + goto done; + + /* sort strings by context, so that we can identify duplicates */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); + + /* + * iterate groups of equal strings and if any instance in a group was + * referenced, emit single instance and remember new offset + */ + p = tmp_strs; + grp_idx = 0; + grp_used = strs.ptrs[0].used; + /* iterate past end to avoid code duplication after loop */ + for (i = 1; i <= strs.cnt; i++) { + /* + * when i == strs.cnt, we want to skip string comparison and go + * straight to handling last group of strings (otherwise we'd + * need to handle last group after the loop w/ duplicated code) + */ + if (i < strs.cnt && + !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { + grp_used = grp_used || strs.ptrs[i].used; + continue; + } + + /* + * this check would have been required after the loop to handle + * last group of strings, but due to <= condition in a loop + * we avoid that duplication + */ + if (grp_used) { + int new_off = p - tmp_strs; + __u32 len = strlen(strs.ptrs[grp_idx].str); + + memmove(p, strs.ptrs[grp_idx].str, len + 1); + for (j = grp_idx; j < i; j++) + strs.ptrs[j].new_off = new_off; + p += len + 1; + } + + if (i < strs.cnt) { + grp_idx = i; + grp_used = strs.ptrs[i].used; + } + } + + /* replace original strings with deduped ones */ + d->btf->hdr->str_len = p - tmp_strs; + memmove(start, tmp_strs, d->btf->hdr->str_len); + end = start + d->btf->hdr->str_len; + + /* restore original order for further binary search lookups */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); + + /* remap string offsets */ + err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); + if (err) + goto done; + + d->btf->hdr->str_len = end - start; + +done: + free(tmp_strs); + free(strs.ptrs); + return err; +} + +static long btf_hash_common(struct btf_type *t) +{ + long h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + h = hash_combine(h, t->size); + return h; +} + +static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info && + t1->size == t2->size; +} + +/* Calculate type signature hash of INT. */ +static long btf_hash_int(struct btf_type *t) +{ + __u32 info = *(__u32 *)(t + 1); + long h; + + h = btf_hash_common(t); + h = hash_combine(h, info); + return h; +} + +/* Check structural equality of two INTs. */ +static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) +{ + __u32 info1, info2; + + if (!btf_equal_common(t1, t2)) + return false; + info1 = *(__u32 *)(t1 + 1); + info2 = *(__u32 *)(t2 + 1); + return info1 == info2; +} + +/* Calculate type signature hash of ENUM. */ +static long btf_hash_enum(struct btf_type *t) +{ + long h; + + /* don't hash vlen and enum members to support enum fwd resolving */ + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info & ~0xffff); + h = hash_combine(h, t->size); + return h; +} + +/* Check structural equality of two ENUMs. */ +static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_enum *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = btf_vlen(t1); + m1 = btf_enum(t1); + m2 = btf_enum(t2); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val != m2->val) + return false; + m1++; + m2++; + } + return true; +} + +static inline bool btf_is_enum_fwd(struct btf_type *t) +{ + return btf_is_enum(t) && btf_vlen(t) == 0; +} + +static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) + return btf_equal_enum(t1, t2); + /* ignore vlen when comparing */ + return t1->name_off == t2->name_off && + (t1->info & ~0xffff) == (t2->info & ~0xffff) && + t1->size == t2->size; +} + +/* + * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static long btf_hash_struct(struct btf_type *t) +{ + const struct btf_member *member = btf_members(t); + __u32 vlen = btf_vlen(t); + long h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->offset); + /* no hashing of referenced type ID, it can be unresolved yet */ + member++; + } + return h; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_member *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = btf_vlen(t1); + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->offset != m2->offset) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of ARRAY, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static long btf_hash_array(struct btf_type *t) +{ + const struct btf_array *info = btf_array(t); + long h = btf_hash_common(t); + + h = hash_combine(h, info->type); + h = hash_combine(h, info->index_type); + h = hash_combine(h, info->nelems); + return h; +} + +/* + * Check exact equality of two ARRAYs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * ARRAY to potential canonical representative. + */ +static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = btf_array(t1); + info2 = btf_array(t2); + return info1->type == info2->type && + info1->index_type == info2->index_type && + info1->nelems == info2->nelems; +} + +/* + * Check structural compatibility of two ARRAYs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_equal_common(t1, t2)) + return false; + + return btf_array(t1)->nelems == btf_array(t2)->nelems; +} + +/* + * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static long btf_hash_fnproto(struct btf_type *t) +{ + const struct btf_param *member = btf_params(t); + __u16 vlen = btf_vlen(t); + long h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->type); + member++; + } + return h; +} + +/* + * Check exact equality of two FUNC_PROTOs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * FUNC_PROTO to potential canonical representative. + */ +static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_param *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = btf_vlen(t1); + m1 = btf_params(t1); + m2 = btf_params(t2); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->type != m2->type) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_param *m1, *m2; + __u16 vlen; + int i; + + /* skip return type ID */ + if (t1->name_off != t2->name_off || t1->info != t2->info) + return false; + + vlen = btf_vlen(t1); + m1 = btf_params(t1); + m2 = btf_params(t2); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Deduplicate primitive types, that can't reference other types, by calculating + * their type signature hash and comparing them with any possible canonical + * candidate. If no canonical candidate matches, type itself is marked as + * canonical and is added into `btf_dedup->dedup_table` as another candidate. + */ +static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + struct hashmap_entry *hash_entry; + struct btf_type *cand; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u32 cand_id; + long h; + + switch (btf_kind(t)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FUNC: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: + return 0; + + case BTF_KIND_INT: + h = btf_hash_int(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; + if (btf_equal_int(t, cand)) { + new_id = cand_id; + break; + } + } + break; + + case BTF_KIND_ENUM: + h = btf_hash_enum(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; + if (btf_equal_enum(t, cand)) { + new_id = cand_id; + break; + } + if (d->opts.dont_resolve_fwds) + continue; + if (btf_compat_enum(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ + new_id = cand_id; + break; + } + /* resolve canonical enum fwd to full enum */ + d->map[cand_id] = type_id; + } + } + break; + + case BTF_KIND_FWD: + h = btf_hash_common(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_id; + break; + } + } + break; + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_prim_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_prim_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Check whether type is already mapped into canonical one (could be to itself). + */ +static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) +{ + return d->map[type_id] <= BTF_MAX_NR_TYPES; +} + +/* + * Resolve type ID into its canonical type ID, if any; otherwise return original + * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow + * STRUCT/UNION link and resolve it into canonical type ID as well. + */ +static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) +{ + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + return type_id; +} + +/* + * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original + * type ID. + */ +static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) +{ + __u32 orig_type_id = type_id; + + if (!btf_is_fwd(d->btf->types[type_id])) + return type_id; + + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + + if (!btf_is_fwd(d->btf->types[type_id])) + return type_id; + + return orig_type_id; +} + + +static inline __u16 btf_fwd_kind(struct btf_type *t) +{ + return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; +} + +/* + * Check equivalence of BTF type graph formed by candidate struct/union (we'll + * call it "candidate graph" in this description for brevity) to a type graph + * formed by (potential) canonical struct/union ("canonical graph" for brevity + * here, though keep in mind that not all types in canonical graph are + * necessarily canonical representatives themselves, some of them might be + * duplicates or its uniqueness might not have been established yet). + * Returns: + * - >0, if type graphs are equivalent; + * - 0, if not equivalent; + * - <0, on error. + * + * Algorithm performs side-by-side DFS traversal of both type graphs and checks + * equivalence of BTF types at each step. If at any point BTF types in candidate + * and canonical graphs are not compatible structurally, whole graphs are + * incompatible. If types are structurally equivalent (i.e., all information + * except referenced type IDs is exactly the same), a mapping from `canon_id` to + * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). + * If a type references other types, then those referenced types are checked + * for equivalence recursively. + * + * During DFS traversal, if we find that for current `canon_id` type we + * already have some mapping in hypothetical map, we check for two possible + * situations: + * - `canon_id` is mapped to exactly the same type as `cand_id`. This will + * happen when type graphs have cycles. In this case we assume those two + * types are equivalent. + * - `canon_id` is mapped to different type. This is contradiction in our + * hypothetical mapping, because same graph in canonical graph corresponds + * to two different types in candidate graph, which for equivalent type + * graphs shouldn't happen. This condition terminates equivalence check + * with negative result. + * + * If type graphs traversal exhausts types to check and find no contradiction, + * then type graphs are equivalent. + * + * When checking types for equivalence, there is one special case: FWD types. + * If FWD type resolution is allowed and one of the types (either from canonical + * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind + * flag) and their names match, hypothetical mapping is updated to point from + * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, + * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. + * + * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, + * if there are two exactly named (or anonymous) structs/unions that are + * compatible structurally, one of which has FWD field, while other is concrete + * STRUCT/UNION, but according to C sources they are different structs/unions + * that are referencing different types with the same name. This is extremely + * unlikely to happen, but btf_dedup API allows to disable FWD resolution if + * this logic is causing problems. + * + * Doing FWD resolution means that both candidate and/or canonical graphs can + * consists of portions of the graph that come from multiple compilation units. + * This is due to the fact that types within single compilation unit are always + * deduplicated and FWDs are already resolved, if referenced struct/union + * definiton is available. So, if we had unresolved FWD and found corresponding + * STRUCT/UNION, they will be from different compilation units. This + * consequently means that when we "link" FWD to corresponding STRUCT/UNION, + * type graph will likely have at least two different BTF types that describe + * same type (e.g., most probably there will be two different BTF types for the + * same 'int' primitive type) and could even have "overlapping" parts of type + * graph that describe same subset of types. + * + * This in turn means that our assumption that each type in canonical graph + * must correspond to exactly one type in candidate graph might not hold + * anymore and will make it harder to detect contradictions using hypothetical + * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION + * resolution only in canonical graph. FWDs in candidate graphs are never + * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs + * that can occur: + * - Both types in canonical and candidate graphs are FWDs. If they are + * structurally equivalent, then they can either be both resolved to the + * same STRUCT/UNION or not resolved at all. In both cases they are + * equivalent and there is no need to resolve FWD on candidate side. + * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, + * so nothing to resolve as well, algorithm will check equivalence anyway. + * - Type in canonical graph is FWD, while type in candidate is concrete + * STRUCT/UNION. In this case candidate graph comes from single compilation + * unit, so there is exactly one BTF type for each unique C type. After + * resolving FWD into STRUCT/UNION, there might be more than one BTF type + * in canonical graph mapping to single BTF type in candidate graph, but + * because hypothetical mapping maps from canonical to candidate types, it's + * alright, and we still maintain the property of having single `canon_id` + * mapping to single `cand_id` (there could be two different `canon_id` + * mapped to the same `cand_id`, but it's not contradictory). + * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate + * graph is FWD. In this case we are just going to check compatibility of + * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll + * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to + * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs + * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from + * canonical graph. + */ +static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, + __u32 canon_id) +{ + struct btf_type *cand_type; + struct btf_type *canon_type; + __u32 hypot_type_id; + __u16 cand_kind; + __u16 canon_kind; + int i, eq; + + /* if both resolve to the same canonical, they must be equivalent */ + if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) + return 1; + + canon_id = resolve_fwd_id(d, canon_id); + + hypot_type_id = d->hypot_map[canon_id]; + if (hypot_type_id <= BTF_MAX_NR_TYPES) + return hypot_type_id == cand_id; + + if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) + return -ENOMEM; + + cand_type = d->btf->types[cand_id]; + canon_type = d->btf->types[canon_id]; + cand_kind = btf_kind(cand_type); + canon_kind = btf_kind(canon_type); + + if (cand_type->name_off != canon_type->name_off) + return 0; + + /* FWD <--> STRUCT/UNION equivalence check, if enabled */ + if (!d->opts.dont_resolve_fwds + && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + && cand_kind != canon_kind) { + __u16 real_kind; + __u16 fwd_kind; + + if (cand_kind == BTF_KIND_FWD) { + real_kind = canon_kind; + fwd_kind = btf_fwd_kind(cand_type); + } else { + real_kind = cand_kind; + fwd_kind = btf_fwd_kind(canon_type); + } + return fwd_kind == real_kind; + } + + if (cand_kind != canon_kind) + return 0; + + switch (cand_kind) { + case BTF_KIND_INT: + return btf_equal_int(cand_type, canon_type); + + case BTF_KIND_ENUM: + if (d->opts.dont_resolve_fwds) + return btf_equal_enum(cand_type, canon_type); + else + return btf_compat_enum(cand_type, canon_type); + + case BTF_KIND_FWD: + return btf_equal_common(cand_type, canon_type); + + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + if (cand_type->info != canon_type->info) + return 0; + return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + + case BTF_KIND_ARRAY: { + const struct btf_array *cand_arr, *canon_arr; + + if (!btf_compat_array(cand_type, canon_type)) + return 0; + cand_arr = btf_array(cand_type); + canon_arr = btf_array(canon_type); + eq = btf_dedup_is_equiv(d, + cand_arr->index_type, canon_arr->index_type); + if (eq <= 0) + return eq; + return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *cand_m, *canon_m; + __u16 vlen; + + if (!btf_shallow_equal_struct(cand_type, canon_type)) + return 0; + vlen = btf_vlen(cand_type); + cand_m = btf_members(cand_type); + canon_m = btf_members(canon_type); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); + if (eq <= 0) + return eq; + cand_m++; + canon_m++; + } + + return 1; + } + + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *cand_p, *canon_p; + __u16 vlen; + + if (!btf_compat_fnproto(cand_type, canon_type)) + return 0; + eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + if (eq <= 0) + return eq; + vlen = btf_vlen(cand_type); + cand_p = btf_params(cand_type); + canon_p = btf_params(canon_type); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); + if (eq <= 0) + return eq; + cand_p++; + canon_p++; + } + return 1; + } + + default: + return -EINVAL; + } + return 0; +} + +/* + * Use hypothetical mapping, produced by successful type graph equivalence + * check, to augment existing struct/union canonical mapping, where possible. + * + * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record + * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: + * it doesn't matter if FWD type was part of canonical graph or candidate one, + * we are recording the mapping anyway. As opposed to carefulness required + * for struct/union correspondence mapping (described below), for FWD resolution + * it's not important, as by the time that FWD type (reference type) will be + * deduplicated all structs/unions will be deduped already anyway. + * + * Recording STRUCT/UNION mapping is purely a performance optimization and is + * not required for correctness. It needs to be done carefully to ensure that + * struct/union from candidate's type graph is not mapped into corresponding + * struct/union from canonical type graph that itself hasn't been resolved into + * canonical representative. The only guarantee we have is that canonical + * struct/union was determined as canonical and that won't change. But any + * types referenced through that struct/union fields could have been not yet + * resolved, so in case like that it's too early to establish any kind of + * correspondence between structs/unions. + * + * No canonical correspondence is derived for primitive types (they are already + * deduplicated completely already anyway) or reference types (they rely on + * stability of struct/union canonical relationship for equivalence checks). + */ +static void btf_dedup_merge_hypot_map(struct btf_dedup *d) +{ + __u32 cand_type_id, targ_type_id; + __u16 t_kind, c_kind; + __u32 t_id, c_id; + int i; + + for (i = 0; i < d->hypot_cnt; i++) { + cand_type_id = d->hypot_list[i]; + targ_type_id = d->hypot_map[cand_type_id]; + t_id = resolve_type_id(d, targ_type_id); + c_id = resolve_type_id(d, cand_type_id); + t_kind = btf_kind(d->btf->types[t_id]); + c_kind = btf_kind(d->btf->types[c_id]); + /* + * Resolve FWD into STRUCT/UNION. + * It's ok to resolve FWD into STRUCT/UNION that's not yet + * mapped to canonical representative (as opposed to + * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because + * eventually that struct is going to be mapped and all resolved + * FWDs will automatically resolve to correct canonical + * representative. This will happen before ref type deduping, + * which critically depends on stability of these mapping. This + * stability is not a requirement for STRUCT/UNION equivalence + * checks, though. + */ + if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) + d->map[c_id] = t_id; + else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) + d->map[t_id] = c_id; + + if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && + c_kind != BTF_KIND_FWD && + is_type_mapped(d, c_id) && + !is_type_mapped(d, t_id)) { + /* + * as a perf optimization, we can map struct/union + * that's part of type graph we just verified for + * equivalence. We can do that for struct/union that has + * canonical representative only, though. + */ + d->map[t_id] = c_id; + } + } +} + +/* + * Deduplicate struct/union types. + * + * For each struct/union type its type signature hash is calculated, taking + * into account type's name, size, number, order and names of fields, but + * ignoring type ID's referenced from fields, because they might not be deduped + * completely until after reference types deduplication phase. This type hash + * is used to iterate over all potential canonical types, sharing same hash. + * For each canonical candidate we check whether type graphs that they form + * (through referenced types in fields and so on) are equivalent using algorithm + * implemented in `btf_dedup_is_equiv`. If such equivalence is found and + * BTF_KIND_FWD resolution is allowed, then hypothetical mapping + * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence + * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to + * potentially map other structs/unions to their canonical representatives, + * if such relationship hasn't yet been established. This speeds up algorithm + * by eliminating some of the duplicate work. + * + * If no matching canonical representative was found, struct/union is marked + * as canonical for itself and is added into btf_dedup->dedup_table hash map + * for further look ups. + */ +static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *cand_type, *t; + struct hashmap_entry *hash_entry; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u16 kind; + long h; + + /* already deduped or is in process of deduping (loop detected) */ + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return 0; + + t = d->btf->types[type_id]; + kind = btf_kind(t); + + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + return 0; + + h = btf_hash_struct(t); + for_each_dedup_cand(d, hash_entry, h) { + __u32 cand_id = (__u32)(long)hash_entry->value; + int eq; + + /* + * Even though btf_dedup_is_equiv() checks for + * btf_shallow_equal_struct() internally when checking two + * structs (unions) for equivalence, we need to guard here + * from picking matching FWD type as a dedup candidate. + * This can happen due to hash collision. In such case just + * relying on btf_dedup_is_equiv() would lead to potentially + * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because + * FWD and compatible STRUCT/UNION are considered equivalent. + */ + cand_type = d->btf->types[cand_id]; + if (!btf_shallow_equal_struct(t, cand_type)) + continue; + + btf_dedup_clear_hypot_map(d); + eq = btf_dedup_is_equiv(d, type_id, cand_id); + if (eq < 0) + return eq; + if (!eq) + continue; + new_id = cand_id; + btf_dedup_merge_hypot_map(d); + break; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_struct_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_struct_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Deduplicate reference type. + * + * Once all primitive and struct/union types got deduplicated, we can easily + * deduplicate all other (reference) BTF types. This is done in two steps: + * + * 1. Resolve all referenced type IDs into their canonical type IDs. This + * resolution can be done either immediately for primitive or struct/union types + * (because they were deduped in previous two phases) or recursively for + * reference types. Recursion will always terminate at either primitive or + * struct/union type, at which point we can "unwind" chain of reference types + * one by one. There is no danger of encountering cycles because in C type + * system the only way to form type cycle is through struct/union, so any chain + * of reference types, even those taking part in a type cycle, will inevitably + * reach struct/union at some point. + * + * 2. Once all referenced type IDs are resolved into canonical ones, BTF type + * becomes "stable", in the sense that no further deduplication will cause + * any changes to it. With that, it's now possible to calculate type's signature + * hash (this time taking into account referenced type IDs) and loop over all + * potential canonical representatives. If no match was found, current type + * will become canonical representative of itself and will be added into + * btf_dedup->dedup_table as another possible canonical representative. + */ +static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) +{ + struct hashmap_entry *hash_entry; + __u32 new_id = type_id, cand_id; + struct btf_type *t, *cand; + /* if we don't find equivalent type, then we are representative type */ + int ref_type_id; + long h; + + if (d->map[type_id] == BTF_IN_PROGRESS_ID) + return -ELOOP; + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return resolve_type_id(d, type_id); + + t = d->btf->types[type_id]; + d->map[type_id] = BTF_IN_PROGRESS_ID; + + switch (btf_kind(t)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + h = btf_hash_common(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_id; + break; + } + } + break; + + case BTF_KIND_ARRAY: { + struct btf_array *info = btf_array(t); + + ref_type_id = btf_dedup_ref_type(d, info->type); + if (ref_type_id < 0) + return ref_type_id; + info->type = ref_type_id; + + ref_type_id = btf_dedup_ref_type(d, info->index_type); + if (ref_type_id < 0) + return ref_type_id; + info->index_type = ref_type_id; + + h = btf_hash_array(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; + if (btf_equal_array(t, cand)) { + new_id = cand_id; + break; + } + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param; + __u16 vlen; + int i; + + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + vlen = btf_vlen(t); + param = btf_params(t); + for (i = 0; i < vlen; i++) { + ref_type_id = btf_dedup_ref_type(d, param->type); + if (ref_type_id < 0) + return ref_type_id; + param->type = ref_type_id; + param++; + } + + h = btf_hash_fnproto(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; + if (btf_equal_fnproto(t, cand)) { + new_id = cand_id; + break; + } + } + break; + } + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return new_id; +} + +static int btf_dedup_ref_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_ref_type(d, i); + if (err < 0) + return err; + } + /* we won't need d->dedup_table anymore */ + hashmap__free(d->dedup_table); + d->dedup_table = NULL; + return 0; +} + +/* + * Compact types. + * + * After we established for each type its corresponding canonical representative + * type, we now can eliminate types that are not canonical and leave only + * canonical ones layed out sequentially in memory by copying them over + * duplicates. During compaction btf_dedup->hypot_map array is reused to store + * a map from original type ID to a new compacted type ID, which will be used + * during next phase to "fix up" type IDs, referenced from struct/union and + * reference types. + */ +static int btf_dedup_compact_types(struct btf_dedup *d) +{ + struct btf_type **new_types; + __u32 next_type_id = 1; + char *types_start, *p; + int i, len; + + /* we are going to reuse hypot_map to store compaction remapping */ + d->hypot_map[0] = 0; + for (i = 1; i <= d->btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + + types_start = d->btf->nohdr_data + d->btf->hdr->type_off; + p = types_start; + + for (i = 1; i <= d->btf->nr_types; i++) { + if (d->map[i] != i) + continue; + + len = btf_type_size(d->btf->types[i]); + if (len < 0) + return len; + + memmove(p, d->btf->types[i], len); + d->hypot_map[i] = next_type_id; + d->btf->types[next_type_id] = (struct btf_type *)p; + p += len; + next_type_id++; + } + + /* shrink struct btf's internal types index and update btf_header */ + d->btf->nr_types = next_type_id - 1; + d->btf->types_size = d->btf->nr_types; + d->btf->hdr->type_len = p - types_start; + new_types = realloc(d->btf->types, + (1 + d->btf->nr_types) * sizeof(struct btf_type *)); + if (!new_types) + return -ENOMEM; + d->btf->types = new_types; + + /* make sure string section follows type information without gaps */ + d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; + memmove(p, d->btf->strings, d->btf->hdr->str_len); + d->btf->strings = p; + p += d->btf->hdr->str_len; + + d->btf->data_size = p - (char *)d->btf->data; + return 0; +} + +/* + * Figure out final (deduplicated and compacted) type ID for provided original + * `type_id` by first resolving it into corresponding canonical type ID and + * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, + * which is populated during compaction phase. + */ +static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) +{ + __u32 resolved_type_id, new_type_id; + + resolved_type_id = resolve_type_id(d, type_id); + new_type_id = d->hypot_map[resolved_type_id]; + if (new_type_id > BTF_MAX_NR_TYPES) + return -EINVAL; + return new_type_id; +} + +/* + * Remap referenced type IDs into deduped type IDs. + * + * After BTF types are deduplicated and compacted, their final type IDs may + * differ from original ones. The map from original to a corresponding + * deduped type ID is stored in btf_dedup->hypot_map and is populated during + * compaction phase. During remapping phase we are rewriting all type IDs + * referenced from any BTF type (e.g., struct fields, func proto args, etc) to + * their final deduped type IDs. + */ +static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + int i, r; + + switch (btf_kind(t)) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + break; + + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + break; + + case BTF_KIND_ARRAY: { + struct btf_array *arr_info = btf_array(t); + + r = btf_dedup_remap_type_id(d, arr_info->type); + if (r < 0) + return r; + arr_info->type = r; + r = btf_dedup_remap_type_id(d, arr_info->index_type); + if (r < 0) + return r; + arr_info->index_type = r; + break; + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *member = btf_members(t); + __u16 vlen = btf_vlen(t); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, member->type); + if (r < 0) + return r; + member->type = r; + member++; + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param = btf_params(t); + __u16 vlen = btf_vlen(t); + + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, param->type); + if (r < 0) + return r; + param->type = r; + param++; + } + break; + } + + case BTF_KIND_DATASEC: { + struct btf_var_secinfo *var = btf_var_secinfos(t); + __u16 vlen = btf_vlen(t); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, var->type); + if (r < 0) + return r; + var->type = r; + var++; + } + break; + } + + default: + return -EINVAL; + } + + return 0; +} + +static int btf_dedup_remap_types(struct btf_dedup *d) +{ + int i, r; + + for (i = 1; i <= d->btf->nr_types; i++) { + r = btf_dedup_remap_type(d, i); + if (r < 0) + return r; + } + return 0; +} diff --git a/src/contrib/libbpf/bpf/btf.h b/src/contrib/libbpf/bpf/btf.h new file mode 100644 index 0000000..d9ac73a --- /dev/null +++ b/src/contrib/libbpf/bpf/btf.h @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2018 Facebook */ + +#ifndef __LIBBPF_BTF_H +#define __LIBBPF_BTF_H + +#include <stdarg.h> +#include <linux/btf.h> +#include <linux/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + +#define BTF_ELF_SEC ".BTF" +#define BTF_EXT_ELF_SEC ".BTF.ext" +#define MAPS_ELF_SEC ".maps" + +struct btf; +struct btf_ext; +struct btf_type; + +struct bpf_object; + +/* + * The .BTF.ext ELF section layout defined as + * struct btf_ext_header + * func_info subsection + * + * The func_info subsection layout: + * record size for struct bpf_func_info in the func_info subsection + * struct btf_sec_func_info for section #1 + * a list of bpf_func_info records for section #1 + * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + * but may not be identical + * struct btf_sec_func_info for section #2 + * a list of bpf_func_info records for section #2 + * ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; + __u32 line_info_off; + __u32 line_info_len; + + /* optional part of .BTF.ext header */ + __u32 field_reloc_off; + __u32 field_reloc_len; +}; + +LIBBPF_API void btf__free(struct btf *btf); +LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API struct btf *btf__parse_elf(const char *path, + struct btf_ext **btf_ext); +LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); +LIBBPF_API int btf__load(struct btf *btf); +LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, + const char *type_name); +LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, + const char *type_name, __u32 kind); +LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); +LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, + __u32 id); +LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); +LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, + __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id); + +LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, + __u32 *size); +LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt); +LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); + +struct btf_dedup_opts { + unsigned int dedup_table_size; + bool dont_resolve_fwds; +}; + +LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); + +struct btf_dump; + +struct btf_dump_opts { + void *ctx; +}; + +typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); + +LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn); +LIBBPF_API void btf_dump__free(struct btf_dump *d); + +LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); + +/* + * A set of helpers for easier BTF types handling + */ +static inline __u16 btf_kind(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info); +} + +static inline __u16 btf_vlen(const struct btf_type *t) +{ + return BTF_INFO_VLEN(t->info); +} + +static inline bool btf_kflag(const struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info); +} + +static inline bool btf_is_int(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_INT; +} + +static inline bool btf_is_ptr(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_PTR; +} + +static inline bool btf_is_array(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ARRAY; +} + +static inline bool btf_is_struct(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_STRUCT; +} + +static inline bool btf_is_union(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_UNION; +} + +static inline bool btf_is_composite(const struct btf_type *t) +{ + __u16 kind = btf_kind(t); + + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; +} + +static inline bool btf_is_enum(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM; +} + +static inline bool btf_is_fwd(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_FWD; +} + +static inline bool btf_is_typedef(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_TYPEDEF; +} + +static inline bool btf_is_volatile(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_VOLATILE; +} + +static inline bool btf_is_const(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_CONST; +} + +static inline bool btf_is_restrict(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_RESTRICT; +} + +static inline bool btf_is_mod(const struct btf_type *t) +{ + __u16 kind = btf_kind(t); + + return kind == BTF_KIND_VOLATILE || + kind == BTF_KIND_CONST || + kind == BTF_KIND_RESTRICT; +} + +static inline bool btf_is_func(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_FUNC; +} + +static inline bool btf_is_func_proto(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_FUNC_PROTO; +} + +static inline bool btf_is_var(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_VAR; +} + +static inline bool btf_is_datasec(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_DATASEC; +} + +static inline __u8 btf_int_encoding(const struct btf_type *t) +{ + return BTF_INT_ENCODING(*(__u32 *)(t + 1)); +} + +static inline __u8 btf_int_offset(const struct btf_type *t) +{ + return BTF_INT_OFFSET(*(__u32 *)(t + 1)); +} + +static inline __u8 btf_int_bits(const struct btf_type *t) +{ + return BTF_INT_BITS(*(__u32 *)(t + 1)); +} + +static inline struct btf_array *btf_array(const struct btf_type *t) +{ + return (struct btf_array *)(t + 1); +} + +static inline struct btf_enum *btf_enum(const struct btf_type *t) +{ + return (struct btf_enum *)(t + 1); +} + +static inline struct btf_member *btf_members(const struct btf_type *t) +{ + return (struct btf_member *)(t + 1); +} + +/* Get bit offset of a member with specified index. */ +static inline __u32 btf_member_bit_offset(const struct btf_type *t, + __u32 member_idx) +{ + const struct btf_member *m = btf_members(t) + member_idx; + bool kflag = btf_kflag(t); + + return kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset; +} +/* + * Get bitfield size of a member, assuming t is BTF_KIND_STRUCT or + * BTF_KIND_UNION. If member is not a bitfield, zero is returned. + */ +static inline __u32 btf_member_bitfield_size(const struct btf_type *t, + __u32 member_idx) +{ + const struct btf_member *m = btf_members(t) + member_idx; + bool kflag = btf_kflag(t); + + return kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0; +} + +static inline struct btf_param *btf_params(const struct btf_type *t) +{ + return (struct btf_param *)(t + 1); +} + +static inline struct btf_var *btf_var(const struct btf_type *t) +{ + return (struct btf_var *)(t + 1); +} + +static inline struct btf_var_secinfo * +btf_var_secinfos(const struct btf_type *t) +{ + return (struct btf_var_secinfo *)(t + 1); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_BTF_H */ diff --git a/src/contrib/libbpf/bpf/btf_dump.c b/src/contrib/libbpf/bpf/btf_dump.c new file mode 100644 index 0000000..cb126d8 --- /dev/null +++ b/src/contrib/libbpf/bpf/btf_dump.c @@ -0,0 +1,1386 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C type converter. + * + * Copyright (c) 2019 Facebook + */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <linux/err.h> +#include <linux/btf.h> +#include "btf.h" +#include "hashmap.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; +static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; + +static const char *pfx(int lvl) +{ + return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl]; +} + +enum btf_dump_type_order_state { + NOT_ORDERED, + ORDERING, + ORDERED, +}; + +enum btf_dump_type_emit_state { + NOT_EMITTED, + EMITTING, + EMITTED, +}; + +/* per-type auxiliary state */ +struct btf_dump_type_aux_state { + /* topological sorting state */ + enum btf_dump_type_order_state order_state: 2; + /* emitting state used to determine the need for forward declaration */ + enum btf_dump_type_emit_state emit_state: 2; + /* whether forward declaration was already emitted */ + __u8 fwd_emitted: 1; + /* whether unique non-duplicate name was already assigned */ + __u8 name_resolved: 1; + /* whether type is referenced from any other type */ + __u8 referenced: 1; +}; + +struct btf_dump { + const struct btf *btf; + const struct btf_ext *btf_ext; + btf_dump_printf_fn_t printf_fn; + struct btf_dump_opts opts; + + /* per-type auxiliary state */ + struct btf_dump_type_aux_state *type_states; + /* per-type optional cached unique name, must be freed, if present */ + const char **cached_names; + + /* topo-sorted list of dependent type definitions */ + __u32 *emit_queue; + int emit_queue_cap; + int emit_queue_cnt; + + /* + * stack of type declarations (e.g., chain of modifiers, arrays, + * funcs, etc) + */ + __u32 *decl_stack; + int decl_stack_cap; + int decl_stack_cnt; + + /* maps struct/union/enum name to a number of name occurrences */ + struct hashmap *type_names; + /* + * maps typedef identifiers and enum value names to a number of such + * name occurrences + */ + struct hashmap *ident_names; +}; + +static size_t str_hash_fn(const void *key, void *ctx) +{ + const char *s = key; + size_t h = 0; + + while (*s) { + h = h * 31 + *s; + s++; + } + return h; +} + +static bool str_equal_fn(const void *a, const void *b, void *ctx) +{ + return strcmp(a, b) == 0; +} + +static const char *btf_name_of(const struct btf_dump *d, __u32 name_off) +{ + return btf__name_by_offset(d->btf, name_off); +} + +static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + d->printf_fn(d->opts.ctx, fmt, args); + va_end(args); +} + +struct btf_dump *btf_dump__new(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn) +{ + struct btf_dump *d; + int err; + + d = calloc(1, sizeof(struct btf_dump)); + if (!d) + return ERR_PTR(-ENOMEM); + + d->btf = btf; + d->btf_ext = btf_ext; + d->printf_fn = printf_fn; + d->opts.ctx = opts ? opts->ctx : NULL; + + d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); + if (IS_ERR(d->type_names)) { + err = PTR_ERR(d->type_names); + d->type_names = NULL; + btf_dump__free(d); + return ERR_PTR(err); + } + d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); + if (IS_ERR(d->ident_names)) { + err = PTR_ERR(d->ident_names); + d->ident_names = NULL; + btf_dump__free(d); + return ERR_PTR(err); + } + + return d; +} + +void btf_dump__free(struct btf_dump *d) +{ + int i, cnt; + + if (!d) + return; + + free(d->type_states); + if (d->cached_names) { + /* any set cached name is owned by us and should be freed */ + for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) { + if (d->cached_names[i]) + free((void *)d->cached_names[i]); + } + } + free(d->cached_names); + free(d->emit_queue); + free(d->decl_stack); + hashmap__free(d->type_names); + hashmap__free(d->ident_names); + + free(d); +} + +static int btf_dump_mark_referenced(struct btf_dump *d); +static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); +static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); + +/* + * Dump BTF type in a compilable C syntax, including all the necessary + * dependent types, necessary for compilation. If some of the dependent types + * were already emitted as part of previous btf_dump__dump_type() invocation + * for another type, they won't be emitted again. This API allows callers to + * filter out BTF types according to user-defined criterias and emitted only + * minimal subset of types, necessary to compile everything. Full struct/union + * definitions will still be emitted, even if the only usage is through + * pointer and could be satisfied with just a forward declaration. + * + * Dumping is done in two high-level passes: + * 1. Topologically sort type definitions to satisfy C rules of compilation. + * 2. Emit type definitions in C syntax. + * + * Returns 0 on success; <0, otherwise. + */ +int btf_dump__dump_type(struct btf_dump *d, __u32 id) +{ + int err, i; + + if (id > btf__get_nr_types(d->btf)) + return -EINVAL; + + /* type states are lazily allocated, as they might not be needed */ + if (!d->type_states) { + d->type_states = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->type_states[0])); + if (!d->type_states) + return -ENOMEM; + d->cached_names = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->cached_names[0])); + if (!d->cached_names) + return -ENOMEM; + + /* VOID is special */ + d->type_states[0].order_state = ORDERED; + d->type_states[0].emit_state = EMITTED; + + /* eagerly determine referenced types for anon enums */ + err = btf_dump_mark_referenced(d); + if (err) + return err; + } + + d->emit_queue_cnt = 0; + err = btf_dump_order_type(d, id, false); + if (err < 0) + return err; + + for (i = 0; i < d->emit_queue_cnt; i++) + btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/); + + return 0; +} + +/* + * Mark all types that are referenced from any other type. This is used to + * determine top-level anonymous enums that need to be emitted as an + * independent type declarations. + * Anonymous enums come in two flavors: either embedded in a struct's field + * definition, in which case they have to be declared inline as part of field + * type declaration; or as a top-level anonymous enum, typically used for + * declaring global constants. It's impossible to distinguish between two + * without knowning whether given enum type was referenced from other type: + * top-level anonymous enum won't be referenced by anything, while embedded + * one will. + */ +static int btf_dump_mark_referenced(struct btf_dump *d) +{ + int i, j, n = btf__get_nr_types(d->btf); + const struct btf_type *t; + __u16 vlen; + + for (i = 1; i <= n; i++) { + t = btf__type_by_id(d->btf, i); + vlen = btf_vlen(t); + + switch (btf_kind(t)) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + break; + + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + d->type_states[t->type].referenced = 1; + break; + + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + + d->type_states[a->index_type].referenced = 1; + d->type_states[a->type].referenced = 1; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + for (j = 0; j < vlen; j++, m++) + d->type_states[m->type].referenced = 1; + break; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = btf_params(t); + + for (j = 0; j < vlen; j++, p++) + d->type_states[p->type].referenced = 1; + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *v = btf_var_secinfos(t); + + for (j = 0; j < vlen; j++, v++) + d->type_states[v->type].referenced = 1; + break; + } + default: + return -EINVAL; + } + } + return 0; +} +static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) +{ + __u32 *new_queue; + size_t new_cap; + + if (d->emit_queue_cnt >= d->emit_queue_cap) { + new_cap = max(16, d->emit_queue_cap * 3 / 2); + new_queue = realloc(d->emit_queue, + new_cap * sizeof(new_queue[0])); + if (!new_queue) + return -ENOMEM; + d->emit_queue = new_queue; + d->emit_queue_cap = new_cap; + } + + d->emit_queue[d->emit_queue_cnt++] = id; + return 0; +} + +/* + * Determine order of emitting dependent types and specified type to satisfy + * C compilation rules. This is done through topological sorting with an + * additional complication which comes from C rules. The main idea for C is + * that if some type is "embedded" into a struct/union, it's size needs to be + * known at the time of definition of containing type. E.g., for: + * + * struct A {}; + * struct B { struct A x; } + * + * struct A *HAS* to be defined before struct B, because it's "embedded", + * i.e., it is part of struct B layout. But in the following case: + * + * struct A; + * struct B { struct A *x; } + * struct A {}; + * + * it's enough to just have a forward declaration of struct A at the time of + * struct B definition, as struct B has a pointer to struct A, so the size of + * field x is known without knowing struct A size: it's sizeof(void *). + * + * Unfortunately, there are some trickier cases we need to handle, e.g.: + * + * struct A {}; // if this was forward-declaration: compilation error + * struct B { + * struct { // anonymous struct + * struct A y; + * } *x; + * }; + * + * In this case, struct B's field x is a pointer, so it's size is known + * regardless of the size of (anonymous) struct it points to. But because this + * struct is anonymous and thus defined inline inside struct B, *and* it + * embeds struct A, compiler requires full definition of struct A to be known + * before struct B can be defined. This creates a transitive dependency + * between struct A and struct B. If struct A was forward-declared before + * struct B definition and fully defined after struct B definition, that would + * trigger compilation error. + * + * All this means that while we are doing topological sorting on BTF type + * graph, we need to determine relationships between different types (graph + * nodes): + * - weak link (relationship) between X and Y, if Y *CAN* be + * forward-declared at the point of X definition; + * - strong link, if Y *HAS* to be fully-defined before X can be defined. + * + * The rule is as follows. Given a chain of BTF types from X to Y, if there is + * BTF_KIND_PTR type in the chain and at least one non-anonymous type + * Z (excluding X, including Y), then link is weak. Otherwise, it's strong. + * Weak/strong relationship is determined recursively during DFS traversal and + * is returned as a result from btf_dump_order_type(). + * + * btf_dump_order_type() is trying to avoid unnecessary forward declarations, + * but it is not guaranteeing that no extraneous forward declarations will be + * emitted. + * + * To avoid extra work, algorithm marks some of BTF types as ORDERED, when + * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT, + * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the + * entire graph path, so depending where from one came to that BTF type, it + * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM, + * once they are processed, there is no need to do it again, so they are + * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces + * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But + * in any case, once those are processed, no need to do it again, as the + * result won't change. + * + * Returns: + * - 1, if type is part of strong link (so there is strong topological + * ordering requirements); + * - 0, if type is part of weak link (so can be satisfied through forward + * declaration); + * - <0, on error (e.g., unsatisfiable type loop detected). + */ +static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) +{ + /* + * Order state is used to detect strong link cycles, but only for BTF + * kinds that are or could be an independent definition (i.e., + * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays, + * func_protos, modifiers are just means to get to these definitions. + * Int/void don't need definitions, they are assumed to be always + * properly defined. We also ignore datasec, var, and funcs for now. + * So for all non-defining kinds, we never even set ordering state, + * for defining kinds we set ORDERING and subsequently ORDERED if it + * forms a strong link. + */ + struct btf_dump_type_aux_state *tstate = &d->type_states[id]; + const struct btf_type *t; + __u16 vlen; + int err, i; + + /* return true, letting typedefs know that it's ok to be emitted */ + if (tstate->order_state == ORDERED) + return 1; + + t = btf__type_by_id(d->btf, id); + + if (tstate->order_state == ORDERING) { + /* type loop, but resolvable through fwd declaration */ + if (btf_is_composite(t) && through_ptr && t->name_off != 0) + return 0; + pr_warn("unsatisfiable type cycle, id:[%u]\n", id); + return -ELOOP; + } + + switch (btf_kind(t)) { + case BTF_KIND_INT: + tstate->order_state = ORDERED; + return 0; + + case BTF_KIND_PTR: + err = btf_dump_order_type(d, t->type, true); + tstate->order_state = ORDERED; + return err; + + case BTF_KIND_ARRAY: + return btf_dump_order_type(d, btf_array(t)->type, through_ptr); + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + /* + * struct/union is part of strong link, only if it's embedded + * (so no ptr in a path) or it's anonymous (so has to be + * defined inline, even if declared through ptr) + */ + if (through_ptr && t->name_off != 0) + return 0; + + tstate->order_state = ORDERING; + + vlen = btf_vlen(t); + for (i = 0; i < vlen; i++, m++) { + err = btf_dump_order_type(d, m->type, false); + if (err < 0) + return err; + } + + if (t->name_off != 0) { + err = btf_dump_add_emit_queue_id(d, id); + if (err < 0) + return err; + } + + tstate->order_state = ORDERED; + return 1; + } + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + /* + * non-anonymous or non-referenced enums are top-level + * declarations and should be emitted. Same logic can be + * applied to FWDs, it won't hurt anyways. + */ + if (t->name_off != 0 || !tstate->referenced) { + err = btf_dump_add_emit_queue_id(d, id); + if (err) + return err; + } + tstate->order_state = ORDERED; + return 1; + + case BTF_KIND_TYPEDEF: { + int is_strong; + + is_strong = btf_dump_order_type(d, t->type, through_ptr); + if (is_strong < 0) + return is_strong; + + /* typedef is similar to struct/union w.r.t. fwd-decls */ + if (through_ptr && !is_strong) + return 0; + + /* typedef is always a named definition */ + err = btf_dump_add_emit_queue_id(d, id); + if (err) + return err; + + d->type_states[id].order_state = ORDERED; + return 1; + } + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf_dump_order_type(d, t->type, through_ptr); + + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = btf_params(t); + bool is_strong; + + err = btf_dump_order_type(d, t->type, through_ptr); + if (err < 0) + return err; + is_strong = err > 0; + + vlen = btf_vlen(t); + for (i = 0; i < vlen; i++, p++) { + err = btf_dump_order_type(d, p->type, through_ptr); + if (err < 0) + return err; + if (err > 0) + is_strong = true; + } + return is_strong; + } + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: + d->type_states[id].order_state = ORDERED; + return 0; + + default: + return -EINVAL; + } +} + +static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t); +static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl); + +static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t); +static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl); + +static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, + const struct btf_type *t); + +static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl); + +/* a local view into a shared stack */ +struct id_stack { + const __u32 *ids; + int cnt; +}; + +static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, + const char *fname, int lvl); +static void btf_dump_emit_type_chain(struct btf_dump *d, + struct id_stack *decl_stack, + const char *fname, int lvl); + +static const char *btf_dump_type_name(struct btf_dump *d, __u32 id); +static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id); +static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, + const char *orig_name); + +static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(d->btf, id); + + /* __builtin_va_list is a compiler built-in, which causes compilation + * errors, when compiling w/ different compiler, then used to compile + * original code (e.g., GCC to compile kernel, Clang to use generated + * C header from BTF). As it is built-in, it should be already defined + * properly internally in compiler. + */ + if (t->name_off == 0) + return false; + return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0; +} + +/* + * Emit C-syntax definitions of types from chains of BTF types. + * + * High-level handling of determining necessary forward declarations are handled + * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type + * declarations/definitions in C syntax are handled by a combo of + * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to + * corresponding btf_dump_emit_*_{def,fwd}() functions. + * + * We also keep track of "containing struct/union type ID" to determine when + * we reference it from inside and thus can avoid emitting unnecessary forward + * declaration. + * + * This algorithm is designed in such a way, that even if some error occurs + * (either technical, e.g., out of memory, or logical, i.e., malformed BTF + * that doesn't comply to C rules completely), algorithm will try to proceed + * and produce as much meaningful output as possible. + */ +static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) +{ + struct btf_dump_type_aux_state *tstate = &d->type_states[id]; + bool top_level_def = cont_id == 0; + const struct btf_type *t; + __u16 kind; + + if (tstate->emit_state == EMITTED) + return; + + t = btf__type_by_id(d->btf, id); + kind = btf_kind(t); + + if (tstate->emit_state == EMITTING) { + if (tstate->fwd_emitted) + return; + + switch (kind) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* + * if we are referencing a struct/union that we are + * part of - then no need for fwd declaration + */ + if (id == cont_id) + return; + if (t->name_off == 0) { + pr_warn("anonymous struct/union loop, id:[%u]\n", + id); + return; + } + btf_dump_emit_struct_fwd(d, id, t); + btf_dump_printf(d, ";\n\n"); + tstate->fwd_emitted = 1; + break; + case BTF_KIND_TYPEDEF: + /* + * for typedef fwd_emitted means typedef definition + * was emitted, but it can be used only for "weak" + * references through pointer only, not for embedding + */ + if (!btf_dump_is_blacklisted(d, id)) { + btf_dump_emit_typedef_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + }; + tstate->fwd_emitted = 1; + break; + default: + break; + } + + return; + } + + switch (kind) { + case BTF_KIND_INT: + tstate->emit_state = EMITTED; + break; + case BTF_KIND_ENUM: + if (top_level_def) { + btf_dump_emit_enum_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + } + tstate->emit_state = EMITTED; + break; + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + btf_dump_emit_type(d, t->type, cont_id); + break; + case BTF_KIND_ARRAY: + btf_dump_emit_type(d, btf_array(t)->type, cont_id); + break; + case BTF_KIND_FWD: + btf_dump_emit_fwd_def(d, id, t); + btf_dump_printf(d, ";\n\n"); + tstate->emit_state = EMITTED; + break; + case BTF_KIND_TYPEDEF: + tstate->emit_state = EMITTING; + btf_dump_emit_type(d, t->type, id); + /* + * typedef can server as both definition and forward + * declaration; at this stage someone depends on + * typedef as a forward declaration (refers to it + * through pointer), so unless we already did it, + * emit typedef as a forward declaration + */ + if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) { + btf_dump_emit_typedef_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + } + tstate->emit_state = EMITTED; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + tstate->emit_state = EMITTING; + /* if it's a top-level struct/union definition or struct/union + * is anonymous, then in C we'll be emitting all fields and + * their types (as opposed to just `struct X`), so we need to + * make sure that all types, referenced from struct/union + * members have necessary forward-declarations, where + * applicable + */ + if (top_level_def || t->name_off == 0) { + const struct btf_member *m = btf_members(t); + __u16 vlen = btf_vlen(t); + int i, new_cont_id; + + new_cont_id = t->name_off == 0 ? cont_id : id; + for (i = 0; i < vlen; i++, m++) + btf_dump_emit_type(d, m->type, new_cont_id); + } else if (!tstate->fwd_emitted && id != cont_id) { + btf_dump_emit_struct_fwd(d, id, t); + btf_dump_printf(d, ";\n\n"); + tstate->fwd_emitted = 1; + } + + if (top_level_def) { + btf_dump_emit_struct_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + tstate->emit_state = EMITTED; + } else { + tstate->emit_state = NOT_EMITTED; + } + break; + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = btf_params(t); + __u16 vlen = btf_vlen(t); + int i; + + btf_dump_emit_type(d, t->type, cont_id); + for (i = 0; i < vlen; i++, p++) + btf_dump_emit_type(d, p->type, cont_id); + + break; + } + default: + break; + } +} + +static int btf_align_of(const struct btf *btf, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + __u16 kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + return min(sizeof(void *), t->size); + case BTF_KIND_PTR: + return sizeof(void *); + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf_align_of(btf, t->type); + case BTF_KIND_ARRAY: + return btf_align_of(btf, btf_array(t)->type); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + __u16 vlen = btf_vlen(t); + int i, align = 1; + + for (i = 0; i < vlen; i++, m++) + align = max(align, btf_align_of(btf, m->type)); + + return align; + } + default: + pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); + return 1; + } +} + +static bool btf_is_struct_packed(const struct btf *btf, __u32 id, + const struct btf_type *t) +{ + const struct btf_member *m; + int align, i, bit_sz; + __u16 vlen; + + align = btf_align_of(btf, id); + /* size of a non-packed struct has to be a multiple of its alignment*/ + if (t->size % align) + return true; + + m = btf_members(t); + vlen = btf_vlen(t); + /* all non-bitfield fields have to be naturally aligned */ + for (i = 0; i < vlen; i++, m++) { + align = btf_align_of(btf, m->type); + bit_sz = btf_member_bitfield_size(t, i); + if (bit_sz == 0 && m->offset % (8 * align) != 0) + return true; + } + + /* + * if original struct was marked as packed, but its layout is + * naturally aligned, we'll detect that it's not packed + */ + return false; +} + +static int chip_away_bits(int total, int at_most) +{ + return total % at_most ? : at_most; +} + +static void btf_dump_emit_bit_padding(const struct btf_dump *d, + int cur_off, int m_off, int m_bit_sz, + int align, int lvl) +{ + int off_diff = m_off - cur_off; + int ptr_bits = sizeof(void *) * 8; + + if (off_diff <= 0) + /* no gap */ + return; + if (m_bit_sz == 0 && off_diff < align * 8) + /* natural padding will take care of a gap */ + return; + + while (off_diff > 0) { + const char *pad_type; + int pad_bits; + + if (ptr_bits > 32 && off_diff > 32) { + pad_type = "long"; + pad_bits = chip_away_bits(off_diff, ptr_bits); + } else if (off_diff > 16) { + pad_type = "int"; + pad_bits = chip_away_bits(off_diff, 32); + } else if (off_diff > 8) { + pad_type = "short"; + pad_bits = chip_away_bits(off_diff, 16); + } else { + pad_type = "char"; + pad_bits = chip_away_bits(off_diff, 8); + } + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); + off_diff -= pad_bits; + } +} + +static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t) +{ + btf_dump_printf(d, "%s %s", + btf_is_struct(t) ? "struct" : "union", + btf_dump_type_name(d, id)); +} + +static void btf_dump_emit_struct_def(struct btf_dump *d, + __u32 id, + const struct btf_type *t, + int lvl) +{ + const struct btf_member *m = btf_members(t); + bool is_struct = btf_is_struct(t); + int align, i, packed, off = 0; + __u16 vlen = btf_vlen(t); + + packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; + + btf_dump_printf(d, "%s%s%s {", + is_struct ? "struct" : "union", + t->name_off ? " " : "", + btf_dump_type_name(d, id)); + + for (i = 0; i < vlen; i++, m++) { + const char *fname; + int m_off, m_sz; + + fname = btf_name_of(d, m->name_off); + m_sz = btf_member_bitfield_size(t, i); + m_off = btf_member_bit_offset(t, i); + align = packed ? 1 : btf_align_of(d->btf, m->type); + + btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); + btf_dump_printf(d, "\n%s", pfx(lvl + 1)); + btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); + + if (m_sz) { + btf_dump_printf(d, ": %d", m_sz); + off = m_off + m_sz; + } else { + m_sz = max(0, btf__resolve_size(d->btf, m->type)); + off = m_off + m_sz * 8; + } + btf_dump_printf(d, ";"); + } + + /* pad at the end, if necessary */ + if (is_struct) { + align = packed ? 1 : btf_align_of(d->btf, id); + btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, + lvl + 1); + } + + if (vlen) + btf_dump_printf(d, "\n"); + btf_dump_printf(d, "%s}", pfx(lvl)); + if (packed) + btf_dump_printf(d, " __attribute__((packed))"); +} + +static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t) +{ + btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); +} + +static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, + int lvl) +{ + const struct btf_enum *v = btf_enum(t); + __u16 vlen = btf_vlen(t); + const char *name; + size_t dup_cnt; + int i; + + btf_dump_printf(d, "enum%s%s", + t->name_off ? " " : "", + btf_dump_type_name(d, id)); + + if (vlen) { + btf_dump_printf(d, " {"); + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + /* enumerators share namespace with typedef idents */ + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + if (dup_cnt > 1) { + btf_dump_printf(d, "\n%s%s___%zu = %d,", + pfx(lvl + 1), name, dup_cnt, + (__s32)v->val); + } else { + btf_dump_printf(d, "\n%s%s = %d,", + pfx(lvl + 1), name, + (__s32)v->val); + } + } + btf_dump_printf(d, "\n%s}", pfx(lvl)); + } +} + +static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, + const struct btf_type *t) +{ + const char *name = btf_dump_type_name(d, id); + + if (btf_kflag(t)) + btf_dump_printf(d, "union %s", name); + else + btf_dump_printf(d, "struct %s", name); +} + +static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl) +{ + const char *name = btf_dump_ident_name(d, id); + + /* + * Old GCC versions are emitting invalid typedef for __gnuc_va_list + * pointing to VOID. This generates warnings from btf_dump() and + * results in uncompilable header file, so we are fixing it up here + * with valid typedef into __builtin_va_list. + */ + if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) { + btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list"); + return; + } + + btf_dump_printf(d, "typedef "); + btf_dump_emit_type_decl(d, t->type, name, lvl); +} + +static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) +{ + __u32 *new_stack; + size_t new_cap; + + if (d->decl_stack_cnt >= d->decl_stack_cap) { + new_cap = max(16, d->decl_stack_cap * 3 / 2); + new_stack = realloc(d->decl_stack, + new_cap * sizeof(new_stack[0])); + if (!new_stack) + return -ENOMEM; + d->decl_stack = new_stack; + d->decl_stack_cap = new_cap; + } + + d->decl_stack[d->decl_stack_cnt++] = id; + + return 0; +} + +/* + * Emit type declaration (e.g., field type declaration in a struct or argument + * declaration in function prototype) in correct C syntax. + * + * For most types it's trivial, but there are few quirky type declaration + * cases worth mentioning: + * - function prototypes (especially nesting of function prototypes); + * - arrays; + * - const/volatile/restrict for pointers vs other types. + * + * For a good discussion of *PARSING* C syntax (as a human), see + * Peter van der Linden's "Expert C Programming: Deep C Secrets", + * Ch.3 "Unscrambling Declarations in C". + * + * It won't help with BTF to C conversion much, though, as it's an opposite + * problem. So we came up with this algorithm in reverse to van der Linden's + * parsing algorithm. It goes from structured BTF representation of type + * declaration to a valid compilable C syntax. + * + * For instance, consider this C typedef: + * typedef const int * const * arr[10] arr_t; + * It will be represented in BTF with this chain of BTF types: + * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int] + * + * Notice how [const] modifier always goes before type it modifies in BTF type + * graph, but in C syntax, const/volatile/restrict modifiers are written to + * the right of pointers, but to the left of other types. There are also other + * quirks, like function pointers, arrays of them, functions returning other + * functions, etc. + * + * We handle that by pushing all the types to a stack, until we hit "terminal" + * type (int/enum/struct/union/fwd). Then depending on the kind of a type on + * top of a stack, modifiers are handled differently. Array/function pointers + * have also wildly different syntax and how nesting of them are done. See + * code for authoritative definition. + * + * To avoid allocating new stack for each independent chain of BTF types, we + * share one bigger stack, with each chain working only on its own local view + * of a stack frame. Some care is required to "pop" stack frames after + * processing type declaration chain. + */ +static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, + const char *fname, int lvl) +{ + struct id_stack decl_stack; + const struct btf_type *t; + int err, stack_start; + + stack_start = d->decl_stack_cnt; + for (;;) { + err = btf_dump_push_decl_stack_id(d, id); + if (err < 0) { + /* + * if we don't have enough memory for entire type decl + * chain, restore stack, emit warning, and try to + * proceed nevertheless + */ + pr_warn("not enough memory for decl stack:%d", err); + d->decl_stack_cnt = stack_start; + return; + } + + /* VOID */ + if (id == 0) + break; + + t = btf__type_by_id(d->btf, id); + switch (btf_kind(t)) { + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_FUNC_PROTO: + id = t->type; + break; + case BTF_KIND_ARRAY: + id = btf_array(t)->type; + break; + case BTF_KIND_INT: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_TYPEDEF: + goto done; + default: + pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", + btf_kind(t), id); + goto done; + } + } +done: + /* + * We might be inside a chain of declarations (e.g., array of function + * pointers returning anonymous (so inlined) structs, having another + * array field). Each of those needs its own "stack frame" to handle + * emitting of declarations. Those stack frames are non-overlapping + * portions of shared btf_dump->decl_stack. To make it a bit nicer to + * handle this set of nested stacks, we create a view corresponding to + * our own "stack frame" and work with it as an independent stack. + * We'll need to clean up after emit_type_chain() returns, though. + */ + decl_stack.ids = d->decl_stack + stack_start; + decl_stack.cnt = d->decl_stack_cnt - stack_start; + btf_dump_emit_type_chain(d, &decl_stack, fname, lvl); + /* + * emit_type_chain() guarantees that it will pop its entire decl_stack + * frame before returning. But it works with a read-only view into + * decl_stack, so it doesn't actually pop anything from the + * perspective of shared btf_dump->decl_stack, per se. We need to + * reset decl_stack state to how it was before us to avoid it growing + * all the time. + */ + d->decl_stack_cnt = stack_start; +} + +static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack) +{ + const struct btf_type *t; + __u32 id; + + while (decl_stack->cnt) { + id = decl_stack->ids[decl_stack->cnt - 1]; + t = btf__type_by_id(d->btf, id); + + switch (btf_kind(t)) { + case BTF_KIND_VOLATILE: + btf_dump_printf(d, "volatile "); + break; + case BTF_KIND_CONST: + btf_dump_printf(d, "const "); + break; + case BTF_KIND_RESTRICT: + btf_dump_printf(d, "restrict "); + break; + default: + return; + } + decl_stack->cnt--; + } +} + +static void btf_dump_emit_name(const struct btf_dump *d, + const char *name, bool last_was_ptr) +{ + bool separate = name[0] && !last_was_ptr; + + btf_dump_printf(d, "%s%s", separate ? " " : "", name); +} + +static void btf_dump_emit_type_chain(struct btf_dump *d, + struct id_stack *decls, + const char *fname, int lvl) +{ + /* + * last_was_ptr is used to determine if we need to separate pointer + * asterisk (*) from previous part of type signature with space, so + * that we get `int ***`, instead of `int * * *`. We default to true + * for cases where we have single pointer in a chain. E.g., in ptr -> + * func_proto case. func_proto will start a new emit_type_chain call + * with just ptr, which should be emitted as (*) or (*<fname>), so we + * don't want to prepend space for that last pointer. + */ + bool last_was_ptr = true; + const struct btf_type *t; + const char *name; + __u16 kind; + __u32 id; + + while (decls->cnt) { + id = decls->ids[--decls->cnt]; + if (id == 0) { + /* VOID is a special snowflake */ + btf_dump_emit_mods(d, decls); + btf_dump_printf(d, "void"); + last_was_ptr = false; + continue; + } + + t = btf__type_by_id(d->btf, id); + kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + btf_dump_emit_mods(d, decls); + name = btf_name_of(d, t->name_off); + btf_dump_printf(d, "%s", name); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + btf_dump_emit_mods(d, decls); + /* inline anonymous struct/union */ + if (t->name_off == 0) + btf_dump_emit_struct_def(d, id, t, lvl); + else + btf_dump_emit_struct_fwd(d, id, t); + break; + case BTF_KIND_ENUM: + btf_dump_emit_mods(d, decls); + /* inline anonymous enum */ + if (t->name_off == 0) + btf_dump_emit_enum_def(d, id, t, lvl); + else + btf_dump_emit_enum_fwd(d, id, t); + break; + case BTF_KIND_FWD: + btf_dump_emit_mods(d, decls); + btf_dump_emit_fwd_def(d, id, t); + break; + case BTF_KIND_TYPEDEF: + btf_dump_emit_mods(d, decls); + btf_dump_printf(d, "%s", btf_dump_ident_name(d, id)); + break; + case BTF_KIND_PTR: + btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *"); + break; + case BTF_KIND_VOLATILE: + btf_dump_printf(d, " volatile"); + break; + case BTF_KIND_CONST: + btf_dump_printf(d, " const"); + break; + case BTF_KIND_RESTRICT: + btf_dump_printf(d, " restrict"); + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + const struct btf_type *next_t; + __u32 next_id; + bool multidim; + /* + * GCC has a bug + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354) + * which causes it to emit extra const/volatile + * modifiers for an array, if array's element type has + * const/volatile modifiers. Clang doesn't do that. + * In general, it doesn't seem very meaningful to have + * a const/volatile modifier for array, so we are + * going to silently skip them here. + */ + while (decls->cnt) { + next_id = decls->ids[decls->cnt - 1]; + next_t = btf__type_by_id(d->btf, next_id); + if (btf_is_mod(next_t)) + decls->cnt--; + else + break; + } + + if (decls->cnt == 0) { + btf_dump_emit_name(d, fname, last_was_ptr); + btf_dump_printf(d, "[%u]", a->nelems); + return; + } + + next_id = decls->ids[decls->cnt - 1]; + next_t = btf__type_by_id(d->btf, next_id); + multidim = btf_is_array(next_t); + /* we need space if we have named non-pointer */ + if (fname[0] && !last_was_ptr) + btf_dump_printf(d, " "); + /* no parentheses for multi-dimensional array */ + if (!multidim) + btf_dump_printf(d, "("); + btf_dump_emit_type_chain(d, decls, fname, lvl); + if (!multidim) + btf_dump_printf(d, ")"); + btf_dump_printf(d, "[%u]", a->nelems); + return; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = btf_params(t); + __u16 vlen = btf_vlen(t); + int i; + + btf_dump_emit_mods(d, decls); + if (decls->cnt) { + btf_dump_printf(d, " ("); + btf_dump_emit_type_chain(d, decls, fname, lvl); + btf_dump_printf(d, ")"); + } else { + btf_dump_emit_name(d, fname, last_was_ptr); + } + btf_dump_printf(d, "("); + /* + * Clang for BPF target generates func_proto with no + * args as a func_proto with a single void arg (e.g., + * `int (*f)(void)` vs just `int (*f)()`). We are + * going to pretend there are no args for such case. + */ + if (vlen == 1 && p->type == 0) { + btf_dump_printf(d, ")"); + return; + } + + for (i = 0; i < vlen; i++, p++) { + if (i > 0) + btf_dump_printf(d, ", "); + + /* last arg of type void is vararg */ + if (i == vlen - 1 && p->type == 0) { + btf_dump_printf(d, "..."); + break; + } + + name = btf_name_of(d, p->name_off); + btf_dump_emit_type_decl(d, p->type, name, lvl); + } + + btf_dump_printf(d, ")"); + return; + } + default: + pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", + kind, id); + return; + } + + last_was_ptr = kind == BTF_KIND_PTR; + } + + btf_dump_emit_name(d, fname, last_was_ptr); +} + +/* return number of duplicates (occurrences) of a given name */ +static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, + const char *orig_name) +{ + size_t dup_cnt = 0; + + hashmap__find(name_map, orig_name, (void **)&dup_cnt); + dup_cnt++; + hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); + + return dup_cnt; +} + +static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, + struct hashmap *name_map) +{ + struct btf_dump_type_aux_state *s = &d->type_states[id]; + const struct btf_type *t = btf__type_by_id(d->btf, id); + const char *orig_name = btf_name_of(d, t->name_off); + const char **cached_name = &d->cached_names[id]; + size_t dup_cnt; + + if (t->name_off == 0) + return ""; + + if (s->name_resolved) + return *cached_name ? *cached_name : orig_name; + + dup_cnt = btf_dump_name_dups(d, name_map, orig_name); + if (dup_cnt > 1) { + const size_t max_len = 256; + char new_name[max_len]; + + snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt); + *cached_name = strdup(new_name); + } + + s->name_resolved = 1; + return *cached_name ? *cached_name : orig_name; +} + +static const char *btf_dump_type_name(struct btf_dump *d, __u32 id) +{ + return btf_dump_resolve_name(d, id, d->type_names); +} + +static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) +{ + return btf_dump_resolve_name(d, id, d->ident_names); +} diff --git a/src/contrib/libbpf/bpf/hashmap.c b/src/contrib/libbpf/bpf/hashmap.c new file mode 100644 index 0000000..6122272 --- /dev/null +++ b/src/contrib/libbpf/bpf/hashmap.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Generic non-thread safe hash map implementation. + * + * Copyright (c) 2019 Facebook + */ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <linux/err.h> +#include "hashmap.h" + +/* start with 4 buckets */ +#define HASHMAP_MIN_CAP_BITS 2 + +static void hashmap_add_entry(struct hashmap_entry **pprev, + struct hashmap_entry *entry) +{ + entry->next = *pprev; + *pprev = entry; +} + +static void hashmap_del_entry(struct hashmap_entry **pprev, + struct hashmap_entry *entry) +{ + *pprev = entry->next; + entry->next = NULL; +} + +void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, void *ctx) +{ + map->hash_fn = hash_fn; + map->equal_fn = equal_fn; + map->ctx = ctx; + + map->buckets = NULL; + map->cap = 0; + map->cap_bits = 0; + map->sz = 0; +} + +struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, + void *ctx) +{ + struct hashmap *map = malloc(sizeof(struct hashmap)); + + if (!map) + return ERR_PTR(-ENOMEM); + hashmap__init(map, hash_fn, equal_fn, ctx); + return map; +} + +void hashmap__clear(struct hashmap *map) +{ + free(map->buckets); + map->cap = map->cap_bits = map->sz = 0; +} + +void hashmap__free(struct hashmap *map) +{ + if (!map) + return; + + hashmap__clear(map); + free(map); +} + +size_t hashmap__size(const struct hashmap *map) +{ + return map->sz; +} + +size_t hashmap__capacity(const struct hashmap *map) +{ + return map->cap; +} + +static bool hashmap_needs_to_grow(struct hashmap *map) +{ + /* grow if empty or more than 75% filled */ + return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap); +} + +static int hashmap_grow(struct hashmap *map) +{ + struct hashmap_entry **new_buckets; + struct hashmap_entry *cur, *tmp; + size_t new_cap_bits, new_cap; + size_t h; + int bkt; + + new_cap_bits = map->cap_bits + 1; + if (new_cap_bits < HASHMAP_MIN_CAP_BITS) + new_cap_bits = HASHMAP_MIN_CAP_BITS; + + new_cap = 1UL << new_cap_bits; + new_buckets = calloc(new_cap, sizeof(new_buckets[0])); + if (!new_buckets) + return -ENOMEM; + + hashmap__for_each_entry_safe(map, cur, tmp, bkt) { + h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits); + hashmap_add_entry(&new_buckets[h], cur); + } + + map->cap = new_cap; + map->cap_bits = new_cap_bits; + free(map->buckets); + map->buckets = new_buckets; + + return 0; +} + +static bool hashmap_find_entry(const struct hashmap *map, + const void *key, size_t hash, + struct hashmap_entry ***pprev, + struct hashmap_entry **entry) +{ + struct hashmap_entry *cur, **prev_ptr; + + if (!map->buckets) + return false; + + for (prev_ptr = &map->buckets[hash], cur = *prev_ptr; + cur; + prev_ptr = &cur->next, cur = cur->next) { + if (map->equal_fn(cur->key, key, map->ctx)) { + if (pprev) + *pprev = prev_ptr; + *entry = cur; + return true; + } + } + + return false; +} + +int hashmap__insert(struct hashmap *map, const void *key, void *value, + enum hashmap_insert_strategy strategy, + const void **old_key, void **old_value) +{ + struct hashmap_entry *entry; + size_t h; + int err; + + if (old_key) + *old_key = NULL; + if (old_value) + *old_value = NULL; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (strategy != HASHMAP_APPEND && + hashmap_find_entry(map, key, h, NULL, &entry)) { + if (old_key) + *old_key = entry->key; + if (old_value) + *old_value = entry->value; + + if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) { + entry->key = key; + entry->value = value; + return 0; + } else if (strategy == HASHMAP_ADD) { + return -EEXIST; + } + } + + if (strategy == HASHMAP_UPDATE) + return -ENOENT; + + if (hashmap_needs_to_grow(map)) { + err = hashmap_grow(map); + if (err) + return err; + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + } + + entry = malloc(sizeof(struct hashmap_entry)); + if (!entry) + return -ENOMEM; + + entry->key = key; + entry->value = value; + hashmap_add_entry(&map->buckets[h], entry); + map->sz++; + + return 0; +} + +bool hashmap__find(const struct hashmap *map, const void *key, void **value) +{ + struct hashmap_entry *entry; + size_t h; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (!hashmap_find_entry(map, key, h, NULL, &entry)) + return false; + + if (value) + *value = entry->value; + return true; +} + +bool hashmap__delete(struct hashmap *map, const void *key, + const void **old_key, void **old_value) +{ + struct hashmap_entry **pprev, *entry; + size_t h; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (!hashmap_find_entry(map, key, h, &pprev, &entry)) + return false; + + if (old_key) + *old_key = entry->key; + if (old_value) + *old_value = entry->value; + + hashmap_del_entry(pprev, entry); + free(entry); + map->sz--; + + return true; +} + diff --git a/src/contrib/libbpf/bpf/hashmap.h b/src/contrib/libbpf/bpf/hashmap.h new file mode 100644 index 0000000..bae8879 --- /dev/null +++ b/src/contrib/libbpf/bpf/hashmap.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Generic non-thread safe hash map implementation. + * + * Copyright (c) 2019 Facebook + */ +#ifndef __LIBBPF_HASHMAP_H +#define __LIBBPF_HASHMAP_H + +#include <stdbool.h> +#include <stddef.h> +#ifdef __GLIBC__ +#include <bits/wordsize.h> +#else +#include <bits/reg.h> +#endif +#include "libbpf_internal.h" + +static inline size_t hash_bits(size_t h, int bits) +{ + /* shuffle bits and return requested number of upper bits */ + return (h * 11400714819323198485llu) >> (__WORDSIZE - bits); +} + +typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); +typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); + +struct hashmap_entry { + const void *key; + void *value; + struct hashmap_entry *next; +}; + +struct hashmap { + hashmap_hash_fn hash_fn; + hashmap_equal_fn equal_fn; + void *ctx; + + struct hashmap_entry **buckets; + size_t cap; + size_t cap_bits; + size_t sz; +}; + +#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ + .hash_fn = (hash_fn), \ + .equal_fn = (equal_fn), \ + .ctx = (ctx), \ + .buckets = NULL, \ + .cap = 0, \ + .cap_bits = 0, \ + .sz = 0, \ +} + +void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, void *ctx); +struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, + void *ctx); +void hashmap__clear(struct hashmap *map); +void hashmap__free(struct hashmap *map); + +size_t hashmap__size(const struct hashmap *map); +size_t hashmap__capacity(const struct hashmap *map); + +/* + * Hashmap insertion strategy: + * - HASHMAP_ADD - only add key/value if key doesn't exist yet; + * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise, + * update value; + * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do + * nothing and return -ENOENT; + * - HASHMAP_APPEND - always add key/value pair, even if key already exists. + * This turns hashmap into a multimap by allowing multiple values to be + * associated with the same key. Most useful read API for such hashmap is + * hashmap__for_each_key_entry() iteration. If hashmap__find() is still + * used, it will return last inserted key/value entry (first in a bucket + * chain). + */ +enum hashmap_insert_strategy { + HASHMAP_ADD, + HASHMAP_SET, + HASHMAP_UPDATE, + HASHMAP_APPEND, +}; + +/* + * hashmap__insert() adds key/value entry w/ various semantics, depending on + * provided strategy value. If a given key/value pair replaced already + * existing key/value pair, both old key and old value will be returned + * through old_key and old_value to allow calling code do proper memory + * management. + */ +int hashmap__insert(struct hashmap *map, const void *key, void *value, + enum hashmap_insert_strategy strategy, + const void **old_key, void **old_value); + +static inline int hashmap__add(struct hashmap *map, + const void *key, void *value) +{ + return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); +} + +static inline int hashmap__set(struct hashmap *map, + const void *key, void *value, + const void **old_key, void **old_value) +{ + return hashmap__insert(map, key, value, HASHMAP_SET, + old_key, old_value); +} + +static inline int hashmap__update(struct hashmap *map, + const void *key, void *value, + const void **old_key, void **old_value) +{ + return hashmap__insert(map, key, value, HASHMAP_UPDATE, + old_key, old_value); +} + +static inline int hashmap__append(struct hashmap *map, + const void *key, void *value) +{ + return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); +} + +bool hashmap__delete(struct hashmap *map, const void *key, + const void **old_key, void **old_value); + +bool hashmap__find(const struct hashmap *map, const void *key, void **value); + +/* + * hashmap__for_each_entry - iterate over all entries in hashmap + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @bkt: integer used as a bucket loop cursor + */ +#define hashmap__for_each_entry(map, cur, bkt) \ + for (bkt = 0; bkt < map->cap; bkt++) \ + for (cur = map->buckets[bkt]; cur; cur = cur->next) + +/* + * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe + * against removals + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @tmp: struct hashmap_entry * used as a temporary next cursor storage + * @bkt: integer used as a bucket loop cursor + */ +#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ + for (bkt = 0; bkt < map->cap; bkt++) \ + for (cur = map->buckets[bkt]; \ + cur && ({tmp = cur->next; true; }); \ + cur = tmp) + +/* + * hashmap__for_each_key_entry - iterate over entries associated with given key + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @key: key to iterate entries for + */ +#define hashmap__for_each_key_entry(map, cur, _key) \ + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ + map->cap_bits); \ + map->buckets ? map->buckets[bkt] : NULL; }); \ + cur; \ + cur = cur->next) \ + if (map->equal_fn(cur->key, (_key), map->ctx)) + +#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ + map->cap_bits); \ + cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + cur && ({ tmp = cur->next; true; }); \ + cur = tmp) \ + if (map->equal_fn(cur->key, (_key), map->ctx)) + +#endif /* __LIBBPF_HASHMAP_H */ diff --git a/src/contrib/libbpf/bpf/libbpf.c b/src/contrib/libbpf/bpf/libbpf.c new file mode 100644 index 0000000..29d8d03 --- /dev/null +++ b/src/contrib/libbpf/bpf/libbpf.c @@ -0,0 +1,6581 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Common eBPF ELF object loading operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * Copyright (C) 2017 Nicira, Inc. + * Copyright (C) 2019 Isovalent, Inc. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <libgen.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <endian.h> +#include <fcntl.h> +#include <errno.h> +#include <asm/unistd.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/filter.h> +#include <linux/list.h> +#include <linux/limits.h> +#include <linux/perf_event.h> +#include <linux/ring_buffer.h> +#include <linux/version.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/vfs.h> +#include <sys/utsname.h> +#include <libelf.h> +#include <gelf.h> + +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" +#include "str_error.h" +#include "libbpf_internal.h" +#include "hashmap.h" + +#ifndef EM_BPF +#define EM_BPF 247 +#endif + +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + +/* vsprintf() in __base_pr() uses nonliteral format string. It may break + * compilation if user enables corresponding warning. Disable it explicitly. + */ +#pragma GCC diagnostic ignored "-Wformat-nonliteral" + +#define __printf(a, b) __attribute__((format(printf, a, b))) + +static int __base_pr(enum libbpf_print_level level, const char *format, + va_list args) +{ + if (level == LIBBPF_DEBUG) + return 0; + + return vfprintf(stderr, format, args); +} + +static libbpf_print_fn_t __libbpf_pr = __base_pr; + +libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) +{ + libbpf_print_fn_t old_print_fn = __libbpf_pr; + + __libbpf_pr = fn; + return old_print_fn; +} + +__printf(2, 3) +void libbpf_print(enum libbpf_print_level level, const char *format, ...) +{ + va_list args; + + if (!__libbpf_pr) + return; + + va_start(args, format); + __libbpf_pr(level, format, args); + va_end(args); +} + +#define STRERR_BUFSIZE 128 + +#define CHECK_ERR(action, err, out) do { \ + err = action; \ + if (err) \ + goto out; \ +} while (0) + + +/* Copied from tools/perf/util/util.h */ +#ifndef zfree +# define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) +#endif + +#ifndef zclose +# define zclose(fd) ({ \ + int ___err = 0; \ + if ((fd) >= 0) \ + ___err = close((fd)); \ + fd = -1; \ + ___err; }) +#endif + +#ifdef HAVE_LIBELF_MMAP_SUPPORT +# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP +#else +# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ +#endif + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +struct bpf_capabilities { + /* v4.14: kernel support for program & map names. */ + __u32 name:1; + /* v5.2: kernel support for global data sections. */ + __u32 global_data:1; + /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ + __u32 btf_func:1; + /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ + __u32 btf_datasec:1; + /* BPF_F_MMAPABLE is supported for arrays */ + __u32 array_mmap:1; +}; + +/* + * bpf_prog should be a better name but it has been used in + * linux/filter.h. + */ +struct bpf_program { + /* Index in elf obj file, for relocation use. */ + int idx; + char *name; + int prog_ifindex; + char *section_name; + /* section_name with / replaced by _; makes recursive pinning + * in bpf_object__pin_programs easier + */ + char *pin_name; + struct bpf_insn *insns; + size_t insns_cnt, main_prog_cnt; + enum bpf_prog_type type; + + struct reloc_desc { + enum { + RELO_LD64, + RELO_CALL, + RELO_DATA, + } type; + int insn_idx; + int map_idx; + int sym_off; + } *reloc_desc; + int nr_reloc; + int log_level; + + struct { + int nr; + int *fds; + } instances; + bpf_program_prep_t preprocessor; + + struct bpf_object *obj; + void *priv; + bpf_program_clear_priv_t clear_priv; + + enum bpf_attach_type expected_attach_type; + __u32 attach_btf_id; + __u32 attach_prog_fd; + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_cnt; + + struct bpf_capabilities *caps; + + void *line_info; + __u32 line_info_rec_size; + __u32 line_info_cnt; + __u32 prog_flags; +}; + +enum libbpf_map_type { + LIBBPF_MAP_UNSPEC, + LIBBPF_MAP_DATA, + LIBBPF_MAP_BSS, + LIBBPF_MAP_RODATA, +}; + +static const char * const libbpf_type_to_btf_name[] = { + [LIBBPF_MAP_DATA] = ".data", + [LIBBPF_MAP_BSS] = ".bss", + [LIBBPF_MAP_RODATA] = ".rodata", +}; + +struct bpf_map { + int fd; + char *name; + int sec_idx; + size_t sec_offset; + int map_ifindex; + int inner_map_fd; + struct bpf_map_def def; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + void *priv; + bpf_map_clear_priv_t clear_priv; + enum libbpf_map_type libbpf_type; + char *pin_path; + bool pinned; + bool reused; +}; + +struct bpf_secdata { + void *rodata; + void *data; +}; + +static LIST_HEAD(bpf_objects_list); + +struct bpf_object { + char name[BPF_OBJ_NAME_LEN]; + char license[64]; + __u32 kern_version; + + struct bpf_program *programs; + size_t nr_programs; + struct bpf_map *maps; + size_t nr_maps; + size_t maps_cap; + struct bpf_secdata sections; + + bool loaded; + bool has_pseudo_calls; + bool relaxed_core_relocs; + + /* + * Information when doing elf related work. Only valid if fd + * is valid. + */ + struct { + int fd; + const void *obj_buf; + size_t obj_buf_sz; + Elf *elf; + GElf_Ehdr ehdr; + Elf_Data *symbols; + Elf_Data *data; + Elf_Data *rodata; + Elf_Data *bss; + size_t strtabidx; + struct { + GElf_Shdr shdr; + Elf_Data *data; + } *reloc_sects; + int nr_reloc_sects; + int maps_shndx; + int btf_maps_shndx; + int text_shndx; + int data_shndx; + int rodata_shndx; + int bss_shndx; + } efile; + /* + * All loaded bpf_object is linked in a list, which is + * hidden to caller. bpf_objects__<func> handlers deal with + * all objects. + */ + struct list_head list; + + struct btf *btf; + struct btf_ext *btf_ext; + + void *priv; + bpf_object_clear_priv_t clear_priv; + + struct bpf_capabilities caps; + + char path[]; +}; +#define obj_elf_valid(o) ((o)->efile.elf) + +void bpf_program__unload(struct bpf_program *prog) +{ + int i; + + if (!prog) + return; + + /* + * If the object is opened but the program was never loaded, + * it is possible that prog->instances.nr == -1. + */ + if (prog->instances.nr > 0) { + for (i = 0; i < prog->instances.nr; i++) + zclose(prog->instances.fds[i]); + } else if (prog->instances.nr != -1) { + pr_warn("Internal error: instances.nr is %d\n", + prog->instances.nr); + } + + prog->instances.nr = -1; + zfree(&prog->instances.fds); + + zfree(&prog->func_info); + zfree(&prog->line_info); +} + +static void bpf_program__exit(struct bpf_program *prog) +{ + if (!prog) + return; + + if (prog->clear_priv) + prog->clear_priv(prog, prog->priv); + + prog->priv = NULL; + prog->clear_priv = NULL; + + bpf_program__unload(prog); + zfree(&prog->name); + zfree(&prog->section_name); + zfree(&prog->pin_name); + zfree(&prog->insns); + zfree(&prog->reloc_desc); + + prog->nr_reloc = 0; + prog->insns_cnt = 0; + prog->idx = -1; +} + +static char *__bpf_program__pin_name(struct bpf_program *prog) +{ + char *name, *p; + + name = p = strdup(prog->section_name); + while ((p = strchr(p, '/'))) + *p = '_'; + + return name; +} + +static int +bpf_program__init(void *data, size_t size, char *section_name, int idx, + struct bpf_program *prog) +{ + const size_t bpf_insn_sz = sizeof(struct bpf_insn); + + if (size == 0 || size % bpf_insn_sz) { + pr_warn("corrupted section '%s', size: %zu\n", + section_name, size); + return -EINVAL; + } + + memset(prog, 0, sizeof(*prog)); + + prog->section_name = strdup(section_name); + if (!prog->section_name) { + pr_warn("failed to alloc name for prog under section(%d) %s\n", + idx, section_name); + goto errout; + } + + prog->pin_name = __bpf_program__pin_name(prog); + if (!prog->pin_name) { + pr_warn("failed to alloc pin name for prog under section(%d) %s\n", + idx, section_name); + goto errout; + } + + prog->insns = malloc(size); + if (!prog->insns) { + pr_warn("failed to alloc insns for prog under section %s\n", + section_name); + goto errout; + } + prog->insns_cnt = size / bpf_insn_sz; + memcpy(prog->insns, data, size); + prog->idx = idx; + prog->instances.fds = NULL; + prog->instances.nr = -1; + prog->type = BPF_PROG_TYPE_UNSPEC; + + return 0; +errout: + bpf_program__exit(prog); + return -ENOMEM; +} + +static int +bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, + char *section_name, int idx) +{ + struct bpf_program prog, *progs; + int nr_progs, err; + + err = bpf_program__init(data, size, section_name, idx, &prog); + if (err) + return err; + + prog.caps = &obj->caps; + progs = obj->programs; + nr_progs = obj->nr_programs; + + progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); + if (!progs) { + /* + * In this case the original obj->programs + * is still valid, so don't need special treat for + * bpf_close_object(). + */ + pr_warn("failed to alloc a new program under section '%s'\n", + section_name); + bpf_program__exit(&prog); + return -ENOMEM; + } + + pr_debug("found program %s\n", prog.section_name); + obj->programs = progs; + obj->nr_programs = nr_progs + 1; + prog.obj = obj; + progs[nr_progs] = prog; + return 0; +} + +static int +bpf_object__init_prog_names(struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + struct bpf_program *prog; + size_t pi, si; + + for (pi = 0; pi < obj->nr_programs; pi++) { + const char *name = NULL; + + prog = &obj->programs[pi]; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; + si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (sym.st_shndx != prog->idx) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) + continue; + + name = elf_strptr(obj->efile.elf, + obj->efile.strtabidx, + sym.st_name); + if (!name) { + pr_warn("failed to get sym name string for prog %s\n", + prog->section_name); + return -LIBBPF_ERRNO__LIBELF; + } + } + + if (!name && prog->idx == obj->efile.text_shndx) + name = ".text"; + + if (!name) { + pr_warn("failed to find sym for prog %s\n", + prog->section_name); + return -EINVAL; + } + + prog->name = strdup(name); + if (!prog->name) { + pr_warn("failed to allocate memory for prog sym %s\n", + name); + return -ENOMEM; + } + } + + return 0; +} + +static __u32 get_kernel_version(void) +{ + __u32 major, minor, patch; + struct utsname info; + + uname(&info); + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) + return 0; + return KERNEL_VERSION(major, minor, patch); +} + +static struct bpf_object *bpf_object__new(const char *path, + const void *obj_buf, + size_t obj_buf_sz, + const char *obj_name) +{ + struct bpf_object *obj; + char *end; + + obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); + if (!obj) { + pr_warn("alloc memory failed for %s\n", path); + return ERR_PTR(-ENOMEM); + } + + strcpy(obj->path, path); + if (obj_name) { + strncpy(obj->name, obj_name, sizeof(obj->name) - 1); + obj->name[sizeof(obj->name) - 1] = 0; + } else { + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(obj->name, basename((void *)path), + sizeof(obj->name) - 1); + end = strchr(obj->name, '.'); + if (end) + *end = 0; + } + + obj->efile.fd = -1; + /* + * Caller of this function should also call + * bpf_object__elf_finish() after data collection to return + * obj_buf to user. If not, we should duplicate the buffer to + * avoid user freeing them before elf finish. + */ + obj->efile.obj_buf = obj_buf; + obj->efile.obj_buf_sz = obj_buf_sz; + obj->efile.maps_shndx = -1; + obj->efile.btf_maps_shndx = -1; + obj->efile.data_shndx = -1; + obj->efile.rodata_shndx = -1; + obj->efile.bss_shndx = -1; + + obj->kern_version = get_kernel_version(); + obj->loaded = false; + + INIT_LIST_HEAD(&obj->list); + list_add(&obj->list, &bpf_objects_list); + return obj; +} + +static void bpf_object__elf_finish(struct bpf_object *obj) +{ + if (!obj_elf_valid(obj)) + return; + + if (obj->efile.elf) { + elf_end(obj->efile.elf); + obj->efile.elf = NULL; + } + obj->efile.symbols = NULL; + obj->efile.data = NULL; + obj->efile.rodata = NULL; + obj->efile.bss = NULL; + + zfree(&obj->efile.reloc_sects); + obj->efile.nr_reloc_sects = 0; + zclose(obj->efile.fd); + obj->efile.obj_buf = NULL; + obj->efile.obj_buf_sz = 0; +} + +static int bpf_object__elf_init(struct bpf_object *obj) +{ + int err = 0; + GElf_Ehdr *ep; + + if (obj_elf_valid(obj)) { + pr_warn("elf init: internal error\n"); + return -LIBBPF_ERRNO__LIBELF; + } + + if (obj->efile.obj_buf_sz > 0) { + /* + * obj_buf should have been validated by + * bpf_object__open_buffer(). + */ + obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, + obj->efile.obj_buf_sz); + } else { + obj->efile.fd = open(obj->path, O_RDONLY); + if (obj->efile.fd < 0) { + char errmsg[STRERR_BUFSIZE], *cp; + + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("failed to open %s: %s\n", obj->path, cp); + return err; + } + + obj->efile.elf = elf_begin(obj->efile.fd, + LIBBPF_ELF_C_READ_MMAP, NULL); + } + + if (!obj->efile.elf) { + pr_warn("failed to open %s as ELF file\n", obj->path); + err = -LIBBPF_ERRNO__LIBELF; + goto errout; + } + + if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { + pr_warn("failed to get EHDR from %s\n", obj->path); + err = -LIBBPF_ERRNO__FORMAT; + goto errout; + } + ep = &obj->efile.ehdr; + + /* Old LLVM set e_machine to EM_NONE */ + if (ep->e_type != ET_REL || + (ep->e_machine && ep->e_machine != EM_BPF)) { + pr_warn("%s is not an eBPF object file\n", obj->path); + err = -LIBBPF_ERRNO__FORMAT; + goto errout; + } + + return 0; +errout: + bpf_object__elf_finish(obj); + return err; +} + +static int bpf_object__check_endianness(struct bpf_object *obj) +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB) + return 0; +#elif __BYTE_ORDER == __BIG_ENDIAN + if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB) + return 0; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + pr_warn("endianness mismatch.\n"); + return -LIBBPF_ERRNO__ENDIAN; +} + +static int +bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) +{ + memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); + pr_debug("license of %s is %s\n", obj->path, obj->license); + return 0; +} + +static int +bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) +{ + __u32 kver; + + if (size != sizeof(kver)) { + pr_warn("invalid kver section in %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + memcpy(&kver, data, sizeof(kver)); + obj->kern_version = kver; + pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); + return 0; +} + +static int compare_bpf_map(const void *_a, const void *_b) +{ + const struct bpf_map *a = _a; + const struct bpf_map *b = _b; + + if (a->sec_idx != b->sec_idx) + return a->sec_idx - b->sec_idx; + return a->sec_offset - b->sec_offset; +} + +static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) +{ + if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + type == BPF_MAP_TYPE_HASH_OF_MAPS) + return true; + return false; +} + +static int bpf_object_search_section_size(const struct bpf_object *obj, + const char *name, size_t *d_size) +{ + const GElf_Ehdr *ep = &obj->efile.ehdr; + Elf *elf = obj->efile.elf; + Elf_Scn *scn = NULL; + int idx = 0; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + const char *sec_name; + Elf_Data *data; + GElf_Shdr sh; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warn("failed to get section(%d) header from %s\n", + idx, obj->path); + return -EIO; + } + + sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); + if (!sec_name) { + pr_warn("failed to get section(%d) name from %s\n", + idx, obj->path); + return -EIO; + } + + if (strcmp(name, sec_name)) + continue; + + data = elf_getdata(scn, 0); + if (!data) { + pr_warn("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); + return -EIO; + } + + *d_size = data->d_size; + return 0; + } + + return -ENOENT; +} + +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size) +{ + int ret = -ENOENT; + size_t d_size; + + *size = 0; + if (!name) { + return -EINVAL; + } else if (!strcmp(name, ".data")) { + if (obj->efile.data) + *size = obj->efile.data->d_size; + } else if (!strcmp(name, ".bss")) { + if (obj->efile.bss) + *size = obj->efile.bss->d_size; + } else if (!strcmp(name, ".rodata")) { + if (obj->efile.rodata) + *size = obj->efile.rodata->d_size; + } else { + ret = bpf_object_search_section_size(obj, name, &d_size); + if (!ret) + *size = d_size; + } + + return *size ? 0 : ret; +} + +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off) +{ + Elf_Data *symbols = obj->efile.symbols; + const char *sname; + size_t si; + + if (!name || !off) + return -EINVAL; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + continue; + + sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!sname) { + pr_warn("failed to get sym name string for var %s\n", + name); + return -EIO; + } + if (strcmp(name, sname) == 0) { + *off = sym.st_value; + return 0; + } + } + + return -ENOENT; +} + +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) +{ + struct bpf_map *new_maps; + size_t new_cap; + int i; + + if (obj->nr_maps < obj->maps_cap) + return &obj->maps[obj->nr_maps++]; + + new_cap = max((size_t)4, obj->maps_cap * 3 / 2); + new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); + if (!new_maps) { + pr_warn("alloc maps for object failed\n"); + return ERR_PTR(-ENOMEM); + } + + obj->maps_cap = new_cap; + obj->maps = new_maps; + + /* zero out new maps */ + memset(obj->maps + obj->nr_maps, 0, + (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); + /* + * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) + * when failure (zclose won't close negative fd)). + */ + for (i = obj->nr_maps; i < obj->maps_cap; i++) { + obj->maps[i].fd = -1; + obj->maps[i].inner_map_fd = -1; + } + + return &obj->maps[obj->nr_maps++]; +} + +static int +bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, + int sec_idx, Elf_Data *data, void **data_buff) +{ + char map_name[BPF_OBJ_NAME_LEN]; + struct bpf_map_def *def; + struct bpf_map *map; + + map = bpf_object__add_map(obj); + if (IS_ERR(map)) + return PTR_ERR(map); + + map->libbpf_type = type; + map->sec_idx = sec_idx; + map->sec_offset = 0; + snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, + libbpf_type_to_btf_name[type]); + map->name = strdup(map_name); + if (!map->name) { + pr_warn("failed to alloc map name\n"); + return -ENOMEM; + } + + def = &map->def; + def->type = BPF_MAP_TYPE_ARRAY; + def->key_size = sizeof(int); + def->value_size = data->d_size; + def->max_entries = 1; + def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; + if (obj->caps.array_mmap) + def->map_flags |= BPF_F_MMAPABLE; + + pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", + map_name, map->sec_idx, map->sec_offset, def->map_flags); + + if (data_buff) { + *data_buff = malloc(data->d_size); + if (!*data_buff) { + zfree(&map->name); + pr_warn("failed to alloc map content buffer\n"); + return -ENOMEM; + } + memcpy(*data_buff, data->d_buf, data->d_size); + } + + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); + return 0; +} + +static int bpf_object__init_global_data_maps(struct bpf_object *obj) +{ + int err; + + if (!obj->caps.global_data) + return 0; + /* + * Populate obj->maps with libbpf internal maps. + */ + if (obj->efile.data_shndx >= 0) { + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, + obj->efile.data_shndx, + obj->efile.data, + &obj->sections.data); + if (err) + return err; + } + if (obj->efile.rodata_shndx >= 0) { + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, + obj->efile.rodata_shndx, + obj->efile.rodata, + &obj->sections.rodata); + if (err) + return err; + } + if (obj->efile.bss_shndx >= 0) { + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, + obj->efile.bss_shndx, + obj->efile.bss, NULL); + if (err) + return err; + } + return 0; +} + +static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) +{ + Elf_Data *symbols = obj->efile.symbols; + int i, map_def_sz = 0, nr_maps = 0, nr_syms; + Elf_Data *data = NULL; + Elf_Scn *scn; + + if (obj->efile.maps_shndx < 0) + return 0; + + if (!symbols) + return -EINVAL; + + scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); + if (scn) + data = elf_getdata(scn, NULL); + if (!scn || !data) { + pr_warn("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); + return -EINVAL; + } + + /* + * Count number of maps. Each map has a name. + * Array of maps is not supported: only the first element is + * considered. + * + * TODO: Detect array of map and report error. + */ + nr_syms = symbols->d_size / sizeof(GElf_Sym); + for (i = 0; i < nr_syms; i++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, i, &sym)) + continue; + if (sym.st_shndx != obj->efile.maps_shndx) + continue; + nr_maps++; + } + /* Assume equally sized map definitions */ + pr_debug("maps in %s: %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + + if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { + pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + return -EINVAL; + } + map_def_sz = data->d_size / nr_maps; + + /* Fill obj->maps using data in "maps" section. */ + for (i = 0; i < nr_syms; i++) { + GElf_Sym sym; + const char *map_name; + struct bpf_map_def *def; + struct bpf_map *map; + + if (!gelf_getsym(symbols, i, &sym)) + continue; + if (sym.st_shndx != obj->efile.maps_shndx) + continue; + + map = bpf_object__add_map(obj); + if (IS_ERR(map)) + return PTR_ERR(map); + + map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!map_name) { + pr_warn("failed to get map #%d name sym string for obj %s\n", + i, obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + + map->libbpf_type = LIBBPF_MAP_UNSPEC; + map->sec_idx = sym.st_shndx; + map->sec_offset = sym.st_value; + pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", + map_name, map->sec_idx, map->sec_offset); + if (sym.st_value + map_def_sz > data->d_size) { + pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", + obj->path, map_name); + return -EINVAL; + } + + map->name = strdup(map_name); + if (!map->name) { + pr_warn("failed to alloc map name\n"); + return -ENOMEM; + } + pr_debug("map %d is \"%s\"\n", i, map->name); + def = (struct bpf_map_def *)(data->d_buf + sym.st_value); + /* + * If the definition of the map in the object file fits in + * bpf_map_def, copy it. Any extra fields in our version + * of bpf_map_def will default to zero as a result of the + * calloc above. + */ + if (map_def_sz <= sizeof(struct bpf_map_def)) { + memcpy(&map->def, def, map_def_sz); + } else { + /* + * Here the map structure being read is bigger than what + * we expect, truncate if the excess bits are all zero. + * If they are not zero, reject this map as + * incompatible. + */ + char *b; + + for (b = ((char *)def) + sizeof(struct bpf_map_def); + b < ((char *)def) + map_def_sz; b++) { + if (*b != 0) { + pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", + obj->path, map_name); + if (strict) + return -EINVAL; + } + } + memcpy(&map->def, def, sizeof(struct bpf_map_def)); + } + } + return 0; +} + +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + + if (res_id) + *res_id = id; + + while (btf_is_mod(t) || btf_is_typedef(t)) { + if (res_id) + *res_id = t->type; + t = btf__type_by_id(btf, t->type); + } + + return t; +} + +/* + * Fetch integer attribute of BTF map definition. Such attributes are + * represented using a pointer to an array, in which dimensionality of array + * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; + * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF + * type definition, while using only sizeof(void *) space in ELF data section. + */ +static bool get_map_field_int(const char *map_name, const struct btf *btf, + const struct btf_type *def, + const struct btf_member *m, __u32 *res) +{ + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); + const char *name = btf__name_by_offset(btf, m->name_off); + const struct btf_array *arr_info; + const struct btf_type *arr_t; + + if (!btf_is_ptr(t)) { + pr_warn("map '%s': attr '%s': expected PTR, got %u.\n", + map_name, name, btf_kind(t)); + return false; + } + + arr_t = btf__type_by_id(btf, t->type); + if (!arr_t) { + pr_warn("map '%s': attr '%s': type [%u] not found.\n", + map_name, name, t->type); + return false; + } + if (!btf_is_array(arr_t)) { + pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n", + map_name, name, btf_kind(arr_t)); + return false; + } + arr_info = btf_array(arr_t); + *res = arr_info->nelems; + return true; +} + +static int build_map_pin_path(struct bpf_map *map, const char *path) +{ + char buf[PATH_MAX]; + int err, len; + + if (!path) + path = "/sys/fs/bpf"; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_map__set_pin_path(map, buf); + if (err) + return err; + + return 0; +} + +static int bpf_object__init_user_btf_map(struct bpf_object *obj, + const struct btf_type *sec, + int var_idx, int sec_idx, + const Elf_Data *data, bool strict, + const char *pin_root_path) +{ + const struct btf_type *var, *def, *t; + const struct btf_var_secinfo *vi; + const struct btf_var *var_extra; + const struct btf_member *m; + const char *map_name; + struct bpf_map *map; + int vlen, i; + + vi = btf_var_secinfos(sec) + var_idx; + var = btf__type_by_id(obj->btf, vi->type); + var_extra = btf_var(var); + map_name = btf__name_by_offset(obj->btf, var->name_off); + vlen = btf_vlen(var); + + if (map_name == NULL || map_name[0] == '\0') { + pr_warn("map #%d: empty name.\n", var_idx); + return -EINVAL; + } + if ((__u64)vi->offset + vi->size > data->d_size) { + pr_warn("map '%s' BTF data is corrupted.\n", map_name); + return -EINVAL; + } + if (!btf_is_var(var)) { + pr_warn("map '%s': unexpected var kind %u.\n", + map_name, btf_kind(var)); + return -EINVAL; + } + if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && + var_extra->linkage != BTF_VAR_STATIC) { + pr_warn("map '%s': unsupported var linkage %u.\n", + map_name, var_extra->linkage); + return -EOPNOTSUPP; + } + + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); + if (!btf_is_struct(def)) { + pr_warn("map '%s': unexpected def kind %u.\n", + map_name, btf_kind(var)); + return -EINVAL; + } + if (def->size > vi->size) { + pr_warn("map '%s': invalid def size.\n", map_name); + return -EINVAL; + } + + map = bpf_object__add_map(obj); + if (IS_ERR(map)) + return PTR_ERR(map); + map->name = strdup(map_name); + if (!map->name) { + pr_warn("map '%s': failed to alloc map name.\n", map_name); + return -ENOMEM; + } + map->libbpf_type = LIBBPF_MAP_UNSPEC; + map->def.type = BPF_MAP_TYPE_UNSPEC; + map->sec_idx = sec_idx; + map->sec_offset = vi->offset; + pr_debug("map '%s': at sec_idx %d, offset %zu.\n", + map_name, map->sec_idx, map->sec_offset); + + vlen = btf_vlen(def); + m = btf_members(def); + for (i = 0; i < vlen; i++, m++) { + const char *name = btf__name_by_offset(obj->btf, m->name_off); + + if (!name) { + pr_warn("map '%s': invalid field #%d.\n", map_name, i); + return -EINVAL; + } + if (strcmp(name, "type") == 0) { + if (!get_map_field_int(map_name, obj->btf, def, m, + &map->def.type)) + return -EINVAL; + pr_debug("map '%s': found type = %u.\n", + map_name, map->def.type); + } else if (strcmp(name, "max_entries") == 0) { + if (!get_map_field_int(map_name, obj->btf, def, m, + &map->def.max_entries)) + return -EINVAL; + pr_debug("map '%s': found max_entries = %u.\n", + map_name, map->def.max_entries); + } else if (strcmp(name, "map_flags") == 0) { + if (!get_map_field_int(map_name, obj->btf, def, m, + &map->def.map_flags)) + return -EINVAL; + pr_debug("map '%s': found map_flags = %u.\n", + map_name, map->def.map_flags); + } else if (strcmp(name, "key_size") == 0) { + __u32 sz; + + if (!get_map_field_int(map_name, obj->btf, def, m, + &sz)) + return -EINVAL; + pr_debug("map '%s': found key_size = %u.\n", + map_name, sz); + if (map->def.key_size && map->def.key_size != sz) { + pr_warn("map '%s': conflicting key size %u != %u.\n", + map_name, map->def.key_size, sz); + return -EINVAL; + } + map->def.key_size = sz; + } else if (strcmp(name, "key") == 0) { + __s64 sz; + + t = btf__type_by_id(obj->btf, m->type); + if (!t) { + pr_warn("map '%s': key type [%d] not found.\n", + map_name, m->type); + return -EINVAL; + } + if (!btf_is_ptr(t)) { + pr_warn("map '%s': key spec is not PTR: %u.\n", + map_name, btf_kind(t)); + return -EINVAL; + } + sz = btf__resolve_size(obj->btf, t->type); + if (sz < 0) { + pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", + map_name, t->type, sz); + return sz; + } + pr_debug("map '%s': found key [%u], sz = %lld.\n", + map_name, t->type, sz); + if (map->def.key_size && map->def.key_size != sz) { + pr_warn("map '%s': conflicting key size %u != %lld.\n", + map_name, map->def.key_size, sz); + return -EINVAL; + } + map->def.key_size = sz; + map->btf_key_type_id = t->type; + } else if (strcmp(name, "value_size") == 0) { + __u32 sz; + + if (!get_map_field_int(map_name, obj->btf, def, m, + &sz)) + return -EINVAL; + pr_debug("map '%s': found value_size = %u.\n", + map_name, sz); + if (map->def.value_size && map->def.value_size != sz) { + pr_warn("map '%s': conflicting value size %u != %u.\n", + map_name, map->def.value_size, sz); + return -EINVAL; + } + map->def.value_size = sz; + } else if (strcmp(name, "value") == 0) { + __s64 sz; + + t = btf__type_by_id(obj->btf, m->type); + if (!t) { + pr_warn("map '%s': value type [%d] not found.\n", + map_name, m->type); + return -EINVAL; + } + if (!btf_is_ptr(t)) { + pr_warn("map '%s': value spec is not PTR: %u.\n", + map_name, btf_kind(t)); + return -EINVAL; + } + sz = btf__resolve_size(obj->btf, t->type); + if (sz < 0) { + pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", + map_name, t->type, sz); + return sz; + } + pr_debug("map '%s': found value [%u], sz = %lld.\n", + map_name, t->type, sz); + if (map->def.value_size && map->def.value_size != sz) { + pr_warn("map '%s': conflicting value size %u != %lld.\n", + map_name, map->def.value_size, sz); + return -EINVAL; + } + map->def.value_size = sz; + map->btf_value_type_id = t->type; + } else if (strcmp(name, "pinning") == 0) { + __u32 val; + int err; + + if (!get_map_field_int(map_name, obj->btf, def, m, + &val)) + return -EINVAL; + pr_debug("map '%s': found pinning = %u.\n", + map_name, val); + + if (val != LIBBPF_PIN_NONE && + val != LIBBPF_PIN_BY_NAME) { + pr_warn("map '%s': invalid pinning value %u.\n", + map_name, val); + return -EINVAL; + } + if (val == LIBBPF_PIN_BY_NAME) { + err = build_map_pin_path(map, pin_root_path); + if (err) { + pr_warn("map '%s': couldn't build pin path.\n", + map_name); + return err; + } + } + } else { + if (strict) { + pr_warn("map '%s': unknown field '%s'.\n", + map_name, name); + return -ENOTSUP; + } + pr_debug("map '%s': ignoring unknown field '%s'.\n", + map_name, name); + } + } + + if (map->def.type == BPF_MAP_TYPE_UNSPEC) { + pr_warn("map '%s': map type isn't specified.\n", map_name); + return -EINVAL; + } + + return 0; +} + +static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, + const char *pin_root_path) +{ + const struct btf_type *sec = NULL; + int nr_types, i, vlen, err; + const struct btf_type *t; + const char *name; + Elf_Data *data; + Elf_Scn *scn; + + if (obj->efile.btf_maps_shndx < 0) + return 0; + + scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx); + if (scn) + data = elf_getdata(scn, NULL); + if (!scn || !data) { + pr_warn("failed to get Elf_Data from map section %d (%s)\n", + obj->efile.maps_shndx, MAPS_ELF_SEC); + return -EINVAL; + } + + nr_types = btf__get_nr_types(obj->btf); + for (i = 1; i <= nr_types; i++) { + t = btf__type_by_id(obj->btf, i); + if (!btf_is_datasec(t)) + continue; + name = btf__name_by_offset(obj->btf, t->name_off); + if (strcmp(name, MAPS_ELF_SEC) == 0) { + sec = t; + break; + } + } + + if (!sec) { + pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); + return -ENOENT; + } + + vlen = btf_vlen(sec); + for (i = 0; i < vlen; i++) { + err = bpf_object__init_user_btf_map(obj, sec, i, + obj->efile.btf_maps_shndx, + data, strict, + pin_root_path); + if (err) + return err; + } + + return 0; +} + +static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, + const char *pin_root_path) +{ + bool strict = !relaxed_maps; + int err; + + err = bpf_object__init_user_maps(obj, strict); + if (err) + return err; + + err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + if (err) + return err; + + err = bpf_object__init_global_data_maps(obj); + if (err) + return err; + + if (obj->nr_maps) { + qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), + compare_bpf_map); + } + return 0; +} + +static bool section_have_execinstr(struct bpf_object *obj, int idx) +{ + Elf_Scn *scn; + GElf_Shdr sh; + + scn = elf_getscn(obj->efile.elf, idx); + if (!scn) + return false; + + if (gelf_getshdr(scn, &sh) != &sh) + return false; + + if (sh.sh_flags & SHF_EXECINSTR) + return true; + + return false; +} + +static void bpf_object__sanitize_btf(struct bpf_object *obj) +{ + bool has_datasec = obj->caps.btf_datasec; + bool has_func = obj->caps.btf_func; + struct btf *btf = obj->btf; + struct btf_type *t; + int i, j, vlen; + + if (!obj->btf || (has_func && has_datasec)) + return; + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + t = (struct btf_type *)btf__type_by_id(btf, i); + + if (!has_datasec && btf_is_var(t)) { + /* replace VAR with INT */ + t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); + /* + * using size = 1 is the safest choice, 4 will be too + * big and cause kernel BTF validation failure if + * original variable took less than 4 bytes + */ + t->size = 1; + *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); + } else if (!has_datasec && btf_is_datasec(t)) { + /* replace DATASEC with STRUCT */ + const struct btf_var_secinfo *v = btf_var_secinfos(t); + struct btf_member *m = btf_members(t); + struct btf_type *vt; + char *name; + + name = (char *)btf__name_by_offset(btf, t->name_off); + while (*name) { + if (*name == '.') + *name = '_'; + name++; + } + + vlen = btf_vlen(t); + t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); + for (j = 0; j < vlen; j++, v++, m++) { + /* order of field assignments is important */ + m->offset = v->offset * 8; + m->type = v->type; + /* preserve variable name as member name */ + vt = (void *)btf__type_by_id(btf, v->type); + m->name_off = vt->name_off; + } + } else if (!has_func && btf_is_func_proto(t)) { + /* replace FUNC_PROTO with ENUM */ + vlen = btf_vlen(t); + t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); + t->size = sizeof(__u32); /* kernel enforced */ + } else if (!has_func && btf_is_func(t)) { + /* replace FUNC with TYPEDEF */ + t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); + } + } +} + +static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) +{ + if (!obj->btf_ext) + return; + + if (!obj->caps.btf_func) { + btf_ext__free(obj->btf_ext); + obj->btf_ext = NULL; + } +} + +static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) +{ + return obj->efile.btf_maps_shndx >= 0; +} + +static int bpf_object__init_btf(struct bpf_object *obj, + Elf_Data *btf_data, + Elf_Data *btf_ext_data) +{ + bool btf_required = bpf_object__is_btf_mandatory(obj); + int err = 0; + + if (btf_data) { + obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(obj->btf)) { + pr_warn("Error loading ELF section %s: %d.\n", + BTF_ELF_SEC, err); + goto out; + } + err = btf__finalize_data(obj, obj->btf); + if (err) { + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); + goto out; + } + } + if (btf_ext_data) { + if (!obj->btf) { + pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", + BTF_EXT_ELF_SEC, BTF_ELF_SEC); + goto out; + } + obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size); + if (IS_ERR(obj->btf_ext)) { + pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + goto out; + } + } +out: + if (err || IS_ERR(obj->btf)) { + if (btf_required) + err = err ? : PTR_ERR(obj->btf); + else + err = 0; + if (!IS_ERR_OR_NULL(obj->btf)) + btf__free(obj->btf); + obj->btf = NULL; + } + if (btf_required && !obj->btf) { + pr_warn("BTF is required, but is missing or corrupted.\n"); + return err == 0 ? -ENOENT : err; + } + return 0; +} + +static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) +{ + int err = 0; + + if (!obj->btf) + return 0; + + bpf_object__sanitize_btf(obj); + bpf_object__sanitize_btf_ext(obj); + + err = btf__load(obj->btf); + if (err) { + pr_warn("Error loading %s into kernel: %d.\n", + BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + /* btf_ext can't exist without btf, so free it as well */ + if (obj->btf_ext) { + btf_ext__free(obj->btf_ext); + obj->btf_ext = NULL; + } + + if (bpf_object__is_btf_mandatory(obj)) + return err; + } + return 0; +} + +static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, + const char *pin_root_path) +{ + Elf *elf = obj->efile.elf; + GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Data *btf_ext_data = NULL; + Elf_Data *btf_data = NULL; + Elf_Scn *scn = NULL; + int idx = 0, err = 0; + + /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { + pr_warn("failed to get e_shstrndx from %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + char *name; + GElf_Shdr sh; + Elf_Data *data; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warn("failed to get section(%d) header from %s\n", + idx, obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + + name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); + if (!name) { + pr_warn("failed to get section(%d) name from %s\n", + idx, obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + + data = elf_getdata(scn, 0); + if (!data) { + pr_warn("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", + idx, name, (unsigned long)data->d_size, + (int)sh.sh_link, (unsigned long)sh.sh_flags, + (int)sh.sh_type); + + if (strcmp(name, "license") == 0) { + err = bpf_object__init_license(obj, + data->d_buf, + data->d_size); + if (err) + return err; + } else if (strcmp(name, "version") == 0) { + err = bpf_object__init_kversion(obj, + data->d_buf, + data->d_size); + if (err) + return err; + } else if (strcmp(name, "maps") == 0) { + obj->efile.maps_shndx = idx; + } else if (strcmp(name, MAPS_ELF_SEC) == 0) { + obj->efile.btf_maps_shndx = idx; + } else if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = data; + } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + btf_ext_data = data; + } else if (sh.sh_type == SHT_SYMTAB) { + if (obj->efile.symbols) { + pr_warn("bpf: multiple SYMTAB in %s\n", + obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + obj->efile.symbols = data; + obj->efile.strtabidx = sh.sh_link; + } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh.sh_flags & SHF_EXECINSTR) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; + err = bpf_object__add_program(obj, data->d_buf, + data->d_size, + name, idx); + if (err) { + char errmsg[STRERR_BUFSIZE]; + char *cp; + + cp = libbpf_strerror_r(-err, errmsg, + sizeof(errmsg)); + pr_warn("failed to alloc program %s (%s): %s", + name, obj->path, cp); + return err; + } + } else if (strcmp(name, ".data") == 0) { + obj->efile.data = data; + obj->efile.data_shndx = idx; + } else if (strcmp(name, ".rodata") == 0) { + obj->efile.rodata = data; + obj->efile.rodata_shndx = idx; + } else { + pr_debug("skip section(%d) %s\n", idx, name); + } + } else if (sh.sh_type == SHT_REL) { + int nr_sects = obj->efile.nr_reloc_sects; + void *sects = obj->efile.reloc_sects; + int sec = sh.sh_info; /* points to other section */ + + /* Only do relo for section with exec instructions */ + if (!section_have_execinstr(obj, sec)) { + pr_debug("skip relo %s(%d) for section(%d)\n", + name, idx, sec); + continue; + } + + sects = reallocarray(sects, nr_sects + 1, + sizeof(*obj->efile.reloc_sects)); + if (!sects) { + pr_warn("reloc_sects realloc failed\n"); + return -ENOMEM; + } + + obj->efile.reloc_sects = sects; + obj->efile.nr_reloc_sects++; + + obj->efile.reloc_sects[nr_sects].shdr = sh; + obj->efile.reloc_sects[nr_sects].data = data; + } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + obj->efile.bss = data; + obj->efile.bss_shndx = idx; + } else { + pr_debug("skip section(%d) %s\n", idx, name); + } + } + + if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { + pr_warn("Corrupted ELF file: index of strtab invalid\n"); + return -LIBBPF_ERRNO__FORMAT; + } + err = bpf_object__init_btf(obj, btf_data, btf_ext_data); + if (!err) + err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); + if (!err) + err = bpf_object__sanitize_and_load_btf(obj); + if (!err) + err = bpf_object__init_prog_names(obj); + return err; +} + +static struct bpf_program * +bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) +{ + struct bpf_program *prog; + size_t i; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog->idx == idx) + return prog; + } + return NULL; +} + +struct bpf_program * +bpf_object__find_program_by_title(const struct bpf_object *obj, + const char *title) +{ + struct bpf_program *pos; + + bpf_object__for_each_program(pos, obj) { + if (pos->section_name && !strcmp(pos->section_name, title)) + return pos; + } + return NULL; +} + +static bool bpf_object__shndx_is_data(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.data_shndx || + shndx == obj->efile.bss_shndx || + shndx == obj->efile.rodata_shndx; +} + +static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.maps_shndx || + shndx == obj->efile.btf_maps_shndx; +} + +static enum libbpf_map_type +bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) +{ + if (shndx == obj->efile.data_shndx) + return LIBBPF_MAP_DATA; + else if (shndx == obj->efile.bss_shndx) + return LIBBPF_MAP_BSS; + else if (shndx == obj->efile.rodata_shndx) + return LIBBPF_MAP_RODATA; + else + return LIBBPF_MAP_UNSPEC; +} + +static int bpf_program__record_reloc(struct bpf_program *prog, + struct reloc_desc *reloc_desc, + __u32 insn_idx, const char *name, + const GElf_Sym *sym, const GElf_Rel *rel) +{ + struct bpf_insn *insn = &prog->insns[insn_idx]; + size_t map_idx, nr_maps = prog->obj->nr_maps; + struct bpf_object *obj = prog->obj; + __u32 shdr_idx = sym->st_shndx; + enum libbpf_map_type type; + struct bpf_map *map; + + /* sub-program call relocation */ + if (insn->code == (BPF_JMP | BPF_CALL)) { + if (insn->src_reg != BPF_PSEUDO_CALL) { + pr_warn("incorrect bpf_call opcode\n"); + return -LIBBPF_ERRNO__RELOC; + } + /* text_shndx can be 0, if no default "main" program exists */ + if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { + pr_warn("bad call relo against section %u\n", shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + if (sym->st_value % 8) { + pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); + return -LIBBPF_ERRNO__RELOC; + } + reloc_desc->type = RELO_CALL; + reloc_desc->insn_idx = insn_idx; + reloc_desc->sym_off = sym->st_value; + obj->has_pseudo_calls = true; + return 0; + } + + if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { + pr_warn("invalid relo for insns[%d].code 0x%x\n", + insn_idx, insn->code); + return -LIBBPF_ERRNO__RELOC; + } + if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { + pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", + name, shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + + type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); + + /* generic map reference relocation */ + if (type == LIBBPF_MAP_UNSPEC) { + if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { + pr_warn("bad map relo against section %u\n", + shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + map = &obj->maps[map_idx]; + if (map->libbpf_type != type || + map->sec_idx != sym->st_shndx || + map->sec_offset != sym->st_value) + continue; + pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n", + map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); + break; + } + if (map_idx >= nr_maps) { + pr_warn("map relo failed to find map for sec %u, off %llu\n", + shdr_idx, (__u64)sym->st_value); + return -LIBBPF_ERRNO__RELOC; + } + reloc_desc->type = RELO_LD64; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = map_idx; + reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ + return 0; + } + + /* global data map relocation */ + if (!bpf_object__shndx_is_data(obj, shdr_idx)) { + pr_warn("bad data relo against section %u\n", shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + if (!obj->caps.global_data) { + pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", + name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + map = &obj->maps[map_idx]; + if (map->libbpf_type != type) + continue; + pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n", + map_idx, map->name, map->sec_idx, map->sec_offset, + insn_idx); + break; + } + if (map_idx >= nr_maps) { + pr_warn("data relo failed to find map for sec %u\n", + shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + + reloc_desc->type = RELO_DATA; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = map_idx; + reloc_desc->sym_off = sym->st_value; + return 0; +} + +static int +bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, + Elf_Data *data, struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + int err, i, nrels; + + pr_debug("collecting relocating info for: '%s'\n", prog->section_name); + nrels = shdr->sh_size / shdr->sh_entsize; + + prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); + if (!prog->reloc_desc) { + pr_warn("failed to alloc memory in relocation\n"); + return -ENOMEM; + } + prog->nr_reloc = nrels; + + for (i = 0; i < nrels; i++) { + const char *name; + __u32 insn_idx; + GElf_Sym sym; + GElf_Rel rel; + + if (!gelf_getrel(data, i, &rel)) { + pr_warn("relocation: failed to get %d reloc\n", i); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + pr_warn("relocation: symbol %"PRIx64" not found\n", + GELF_R_SYM(rel.r_info)); + return -LIBBPF_ERRNO__FORMAT; + } + if (rel.r_offset % sizeof(struct bpf_insn)) + return -LIBBPF_ERRNO__FORMAT; + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : "<?>"; + + pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", + (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), + (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), + GELF_ST_BIND(sym.st_info), sym.st_name, name, + insn_idx); + + err = bpf_program__record_reloc(prog, &prog->reloc_desc[i], + insn_idx, name, &sym, &rel); + if (err) + return err; + } + return 0; +} + +static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) +{ + struct bpf_map_def *def = &map->def; + __u32 key_type_id = 0, value_type_id = 0; + int ret; + + /* if it's BTF-defined map, we don't need to search for type IDs */ + if (map->sec_idx == obj->efile.btf_maps_shndx) + return 0; + + if (!bpf_map__is_internal(map)) { + ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + } else { + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + ret = btf__find_by_name(obj->btf, + libbpf_type_to_btf_name[map->libbpf_type]); + } + if (ret < 0) + return ret; + + map->btf_key_type_id = key_type_id; + map->btf_value_type_id = bpf_map__is_internal(map) ? + ret : value_type_id; + return 0; +} + +int bpf_map__reuse_fd(struct bpf_map *map, int fd) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int new_fd, err; + char *new_name; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) + return err; + + new_name = strdup(info.name); + if (!new_name) + return -errno; + + new_fd = open("/", O_RDONLY | O_CLOEXEC); + if (new_fd < 0) { + err = -errno; + goto err_free_new_name; + } + + new_fd = dup3(fd, new_fd, O_CLOEXEC); + if (new_fd < 0) { + err = -errno; + goto err_close_new_fd; + } + + err = zclose(map->fd); + if (err) { + err = -errno; + goto err_close_new_fd; + } + free(map->name); + + map->fd = new_fd; + map->name = new_name; + map->def.type = info.type; + map->def.key_size = info.key_size; + map->def.value_size = info.value_size; + map->def.max_entries = info.max_entries; + map->def.map_flags = info.map_flags; + map->btf_key_type_id = info.btf_key_type_id; + map->btf_value_type_id = info.btf_value_type_id; + map->reused = true; + + return 0; + +err_close_new_fd: + close(new_fd); +err_free_new_name: + free(new_name); + return err; +} + +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + /* If map already created, its attributes can't be changed. */ + if (map->fd >= 0) + return -EBUSY; + + map->def.max_entries = max_entries; + + return 0; +} + +static int +bpf_object__probe_name(struct bpf_object *obj) +{ + struct bpf_load_program_attr attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret; + + /* make sure basic loading works */ + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", + __func__, cp, errno); + return -errno; + } + close(ret); + + /* now try the same program, but with the name */ + + attr.name = "test"; + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret >= 0) { + obj->caps.name = 1; + close(ret); + } + + return 0; +} + +static int +bpf_object__probe_global_data(struct bpf_object *obj) +{ + struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; + map_attr.key_size = sizeof(int); + map_attr.value_size = 32; + map_attr.max_entries = 1; + + map = bpf_create_map_xattr(&map_attr); + if (map < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, errno); + return -errno; + } + + insns[0].imm = map; + + memset(&prg_attr, 0, sizeof(prg_attr)); + prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + prg_attr.insns = insns; + prg_attr.insns_cnt = ARRAY_SIZE(insns); + prg_attr.license = "GPL"; + + ret = bpf_load_program_xattr(&prg_attr, NULL, 0); + if (ret >= 0) { + obj->caps.global_data = 1; + close(ret); + } + + close(map); + return 0; +} + +static int bpf_object__probe_btf_func(struct bpf_object *obj) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + int btf_fd; + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); + if (btf_fd >= 0) { + obj->caps.btf_func = 1; + close(btf_fd); + return 1; + } + + return 0; +} + +static int bpf_object__probe_btf_datasec(struct bpf_object *obj) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + int btf_fd; + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); + if (btf_fd >= 0) { + obj->caps.btf_datasec = 1; + close(btf_fd); + return 1; + } + + return 0; +} + +static int bpf_object__probe_array_mmap(struct bpf_object *obj) +{ + struct bpf_create_map_attr attr = { + .map_type = BPF_MAP_TYPE_ARRAY, + .map_flags = BPF_F_MMAPABLE, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, + }; + int fd; + + fd = bpf_create_map_xattr(&attr); + if (fd >= 0) { + obj->caps.array_mmap = 1; + close(fd); + return 1; + } + + return 0; +} + +static int +bpf_object__probe_caps(struct bpf_object *obj) +{ + int (*probe_fn[])(struct bpf_object *obj) = { + bpf_object__probe_name, + bpf_object__probe_global_data, + bpf_object__probe_btf_func, + bpf_object__probe_btf_datasec, + bpf_object__probe_array_mmap, + }; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { + ret = probe_fn[i](obj); + if (ret < 0) + pr_debug("Probe #%d failed with %d.\n", i, ret); + } + + return 0; +} + +static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) +{ + struct bpf_map_info map_info = {}; + char msg[STRERR_BUFSIZE]; + __u32 map_info_len; + + map_info_len = sizeof(map_info); + + if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { + pr_warn("failed to get map info for map FD %d: %s\n", + map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); + return false; + } + + return (map_info.type == map->def.type && + map_info.key_size == map->def.key_size && + map_info.value_size == map->def.value_size && + map_info.max_entries == map->def.max_entries && + map_info.map_flags == map->def.map_flags); +} + +static int +bpf_object__reuse_map(struct bpf_map *map) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err, pin_fd; + + pin_fd = bpf_obj_get(map->pin_path); + if (pin_fd < 0) { + err = -errno; + if (err == -ENOENT) { + pr_debug("found no pinned map to reuse at '%s'\n", + map->pin_path); + return 0; + } + + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("couldn't retrieve pinned map '%s': %s\n", + map->pin_path, cp); + return err; + } + + if (!map_is_reuse_compat(map, pin_fd)) { + pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", + map->pin_path); + close(pin_fd); + return -EINVAL; + } + + err = bpf_map__reuse_fd(map, pin_fd); + if (err) { + close(pin_fd); + return err; + } + map->pinned = true; + pr_debug("reused pinned map at '%s'\n", map->pin_path); + + return 0; +} + +static int +bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err, zero = 0; + __u8 *data; + + /* Nothing to do here since kernel already zero-initializes .bss map. */ + if (map->libbpf_type == LIBBPF_MAP_BSS) + return 0; + + data = map->libbpf_type == LIBBPF_MAP_DATA ? + obj->sections.data : obj->sections.rodata; + + err = bpf_map_update_elem(map->fd, &zero, data, 0); + /* Freeze .rodata map as read-only from syscall side. */ + if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + err = bpf_map_freeze(map->fd); + if (err) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("Error freezing map(%s) as read-only: %s\n", + map->name, cp); + err = 0; + } + } + return err; +} + +static int +bpf_object__create_maps(struct bpf_object *obj) +{ + struct bpf_create_map_attr create_attr = {}; + int nr_cpus = 0; + unsigned int i; + int err; + + for (i = 0; i < obj->nr_maps; i++) { + struct bpf_map *map = &obj->maps[i]; + struct bpf_map_def *def = &map->def; + char *cp, errmsg[STRERR_BUFSIZE]; + int *pfd = &map->fd; + + if (map->pin_path) { + err = bpf_object__reuse_map(map); + if (err) { + pr_warn("error reusing pinned map %s\n", + map->name); + return err; + } + } + + if (map->fd >= 0) { + pr_debug("skip map create (preset) %s: fd=%d\n", + map->name, map->fd); + continue; + } + + if (obj->caps.name) + create_attr.name = map->name; + create_attr.map_ifindex = map->map_ifindex; + create_attr.map_type = def->type; + create_attr.map_flags = def->map_flags; + create_attr.key_size = def->key_size; + create_attr.value_size = def->value_size; + if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && + !def->max_entries) { + if (!nr_cpus) + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + pr_warn("failed to determine number of system CPUs: %d\n", + nr_cpus); + err = nr_cpus; + goto err_out; + } + pr_debug("map '%s': setting size to %d\n", + map->name, nr_cpus); + create_attr.max_entries = nr_cpus; + } else { + create_attr.max_entries = def->max_entries; + } + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + if (bpf_map_type__is_map_in_map(def->type) && + map->inner_map_fd >= 0) + create_attr.inner_map_fd = map->inner_map_fd; + + if (obj->btf && !bpf_map_find_btf_info(obj, map)) { + create_attr.btf_fd = btf__fd(obj->btf); + create_attr.btf_key_type_id = map->btf_key_type_id; + create_attr.btf_value_type_id = map->btf_value_type_id; + } + + *pfd = bpf_create_map_xattr(&create_attr); + if (*pfd < 0 && (create_attr.btf_key_type_id || + create_attr.btf_value_type_id)) { + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", + map->name, cp, err); + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; + *pfd = bpf_create_map_xattr(&create_attr); + } + + if (*pfd < 0) { + size_t j; + + err = -errno; +err_out: + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("failed to create map (name: '%s'): %s(%d)\n", + map->name, cp, err); + for (j = 0; j < i; j++) + zclose(obj->maps[j].fd); + return err; + } + + if (bpf_map__is_internal(map)) { + err = bpf_object__populate_internal_map(obj, map); + if (err < 0) { + zclose(*pfd); + goto err_out; + } + } + + if (map->pin_path && !map->pinned) { + err = bpf_map__pin(map, NULL); + if (err) { + pr_warn("failed to auto-pin map name '%s' at '%s'\n", + map->name, map->pin_path); + return err; + } + } + + pr_debug("created map %s: fd=%d\n", map->name, *pfd); + } + + return 0; +} + +static int +check_btf_ext_reloc_err(struct bpf_program *prog, int err, + void *btf_prog_info, const char *info_name) +{ + if (err != -ENOENT) { + pr_warn("Error in loading %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ + + if (btf_prog_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error out. + */ + pr_warn("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* Have problem loading the very first info. Ignore the rest. */ + pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); + return 0; +} + +static int +bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, + const char *section_name, __u32 insn_offset) +{ + int err; + + if (!insn_offset || prog->func_info) { + /* + * !insn_offset => main program + * + * For sub prog, the main program's func_info has to + * be loaded first (i.e. prog->func_info != NULL) + */ + err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->func_info, + &prog->func_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->func_info, + "bpf_func_info"); + + prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); + } + + if (!insn_offset || prog->line_info) { + err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->line_info, + &prog->line_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->line_info, + "bpf_line_info"); + + prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); + } + + return 0; +} + +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field bit offset represented by spec */ + __u32 bit_offset; +}; + +static bool str_is_empty(const char *s) +{ + return !s || !s[0]; +} + +/* + * Turn bpf_field_reloc into a low- and high-level spec representation, + * validating correctness along the way, as well as calculating resulting + * field bit offset, specified by accessor string. Low-level spec captures + * every single level of nestedness, including traversing anonymous + * struct/union members. High-level one only captures semantically meaningful + * "turning points": named fields and array indicies. + * E.g., for this case: + * + * struct sample { + * int __unimportant; + * struct { + * int __1; + * int __2; + * int a[7]; + * }; + * }; + * + * struct sample *s = ...; + * + * int x = &s->a[3]; // access string = '0:1:2:3' + * + * Low-level spec has 1:1 mapping with each element of access string (it's + * just a parsed access string representation): [0, 1, 2, 3]. + * + * High-level spec will capture only 3 points: + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - field 'a' access (corresponds to '2' in low-level spec); + * - array element #3 access (corresponds to '3' in low-level spec). + * + */ +static int bpf_core_spec_parse(const struct btf *btf, + __u32 type_id, + const char *spec_str, + struct bpf_core_spec *spec) +{ + int access_idx, parsed_len, i; + const struct btf_type *t; + const char *name; + __u32 id; + __s64 sz; + + if (str_is_empty(spec_str) || *spec_str == ':') + return -EINVAL; + + memset(spec, 0, sizeof(*spec)); + spec->btf = btf; + + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ + while (*spec_str) { + if (*spec_str == ':') + ++spec_str; + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) + return -EINVAL; + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + spec_str += parsed_len; + spec->raw_spec[spec->raw_len++] = access_idx; + } + + if (spec->raw_len == 0) + return -EINVAL; + + /* first spec value is always reloc type array index */ + t = skip_mods_and_typedefs(btf, type_id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[0]; + spec->spec[0].type_id = id; + spec->spec[0].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->bit_offset = access_idx * sz * 8; + + for (i = 1; i < spec->raw_len; i++) { + t = skip_mods_and_typedefs(btf, id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[i]; + + if (btf_is_composite(t)) { + const struct btf_member *m; + __u32 bit_offset; + + if (access_idx >= btf_vlen(t)) + return -EINVAL; + + bit_offset = btf_member_bit_offset(t, access_idx); + spec->bit_offset += bit_offset; + + m = btf_members(t) + access_idx; + if (m->name_off) { + name = btf__name_by_offset(btf, m->name_off); + if (str_is_empty(name)) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->spec[spec->len].name = name; + spec->len++; + } + + id = m->type; + } else if (btf_is_array(t)) { + const struct btf_array *a = btf_array(t); + + t = skip_mods_and_typedefs(btf, a->type, &id); + if (!t || access_idx >= a->nelems) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->bit_offset += access_idx * sz * 8; + } else { + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", + type_id, spec_str, i, id, btf_kind(t)); + return -EINVAL; + } + } + + return 0; +} + +static bool bpf_core_is_flavor_sep(const char *s) +{ + /* check X___Y name pattern, where X and Y are not underscores */ + return s[0] != '_' && /* X */ + s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ + s[4] != '_'; /* Y */ +} + +/* Given 'some_struct_name___with_flavor' return the length of a name prefix + * before last triple underscore. Struct name part after last triple + * underscore is ignored by BPF CO-RE relocation during relocation matching. + */ +static size_t bpf_core_essential_name_len(const char *name) +{ + size_t n = strlen(name); + int i; + + for (i = n - 5; i >= 0; i--) { + if (bpf_core_is_flavor_sep(name + i)) + return i + 1; + } + return n; +} + +/* dynamically sized list of type IDs */ +struct ids_vec { + __u32 *data; + int len; +}; + +static void bpf_core_free_cands(struct ids_vec *cand_ids) +{ + free(cand_ids->data); + free(cand_ids); +} + +static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, + __u32 local_type_id, + const struct btf *targ_btf) +{ + size_t local_essent_len, targ_essent_len; + const char *local_name, *targ_name; + const struct btf_type *t; + struct ids_vec *cand_ids; + __u32 *new_ids; + int i, err, n; + + t = btf__type_by_id(local_btf, local_type_id); + if (!t) + return ERR_PTR(-EINVAL); + + local_name = btf__name_by_offset(local_btf, t->name_off); + if (str_is_empty(local_name)) + return ERR_PTR(-EINVAL); + local_essent_len = bpf_core_essential_name_len(local_name); + + cand_ids = calloc(1, sizeof(*cand_ids)); + if (!cand_ids) + return ERR_PTR(-ENOMEM); + + n = btf__get_nr_types(targ_btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(targ_btf, i); + targ_name = btf__name_by_offset(targ_btf, t->name_off); + if (str_is_empty(targ_name)) + continue; + + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != local_essent_len) + continue; + + if (strncmp(local_name, targ_name, local_essent_len) == 0) { + pr_debug("[%d] %s: found candidate [%d] %s\n", + local_type_id, local_name, i, targ_name); + new_ids = realloc(cand_ids->data, cand_ids->len + 1); + if (!new_ids) { + err = -ENOMEM; + goto err_out; + } + cand_ids->data = new_ids; + cand_ids->data[cand_ids->len++] = i; + } + } + return cand_ids; +err_out: + bpf_core_free_cands(cand_ids); + return ERR_PTR(err); +} + +/* Check two types for compatibility, skipping const/volatile/restrict and + * typedefs, to ensure we are relocating compatible entities: + * - any two STRUCTs/UNIONs are compatible and can be mixed; + * - any two FWDs are compatible, if their names match (modulo flavor suffix); + * - any two PTRs are always compatible; + * - for ENUMs, names should be the same (ignoring flavor suffix) or at + * least one of enums should be anonymous; + * - for ENUMs, check sizes, names are ignored; + * - for INT, size and signedness are ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +static int bpf_core_fields_are_compat(const struct btf *local_btf, + __u32 local_id, + const struct btf *targ_btf, + __u32 targ_id) +{ + const struct btf_type *local_type, *targ_type; + +recur: + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) + return 1; + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_PTR: + return 1; + case BTF_KIND_FWD: + case BTF_KIND_ENUM: { + const char *local_name, *targ_name; + size_t local_len, targ_len; + + local_name = btf__name_by_offset(local_btf, + local_type->name_off); + targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); + local_len = bpf_core_essential_name_len(local_name); + targ_len = bpf_core_essential_name_len(targ_name); + /* one of them is anonymous or both w/ same flavor-less names */ + return local_len == 0 || targ_len == 0 || + (local_len == targ_len && + strncmp(local_name, targ_name, local_len) == 0); + } + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && + btf_int_offset(targ_type) == 0; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + default: + pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); + return 0; + } +} + +/* + * Given single high-level named field accessor in local type, find + * corresponding high-level accessor for a target type. Along the way, + * maintain low-level spec for target as well. Also keep updating target + * bit offset. + * + * Searching is performed through recursive exhaustive enumeration of all + * fields of a struct/union. If there are any anonymous (embedded) + * structs/unions, they are recursively searched as well. If field with + * desired name is found, check compatibility between local and target types, + * before returning result. + * + * 1 is returned, if field is found. + * 0 is returned if no compatible field is found. + * <0 is returned on error. + */ +static int bpf_core_match_member(const struct btf *local_btf, + const struct bpf_core_accessor *local_acc, + const struct btf *targ_btf, + __u32 targ_id, + struct bpf_core_spec *spec, + __u32 *next_targ_id) +{ + const struct btf_type *local_type, *targ_type; + const struct btf_member *local_member, *m; + const char *local_name, *targ_name; + __u32 local_id; + int i, n, found; + + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!targ_type) + return -EINVAL; + if (!btf_is_composite(targ_type)) + return 0; + + local_id = local_acc->type_id; + local_type = btf__type_by_id(local_btf, local_id); + local_member = btf_members(local_type) + local_acc->idx; + local_name = btf__name_by_offset(local_btf, local_member->name_off); + + n = btf_vlen(targ_type); + m = btf_members(targ_type); + for (i = 0; i < n; i++, m++) { + __u32 bit_offset; + + bit_offset = btf_member_bit_offset(targ_type, i); + + /* too deep struct/union/array nesting */ + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + /* speculate this member will be the good one */ + spec->bit_offset += bit_offset; + spec->raw_spec[spec->raw_len++] = i; + + targ_name = btf__name_by_offset(targ_btf, m->name_off); + if (str_is_empty(targ_name)) { + /* embedded struct/union, we need to go deeper */ + found = bpf_core_match_member(local_btf, local_acc, + targ_btf, m->type, + spec, next_targ_id); + if (found) /* either found or error */ + return found; + } else if (strcmp(local_name, targ_name) == 0) { + /* matching named field */ + struct bpf_core_accessor *targ_acc; + + targ_acc = &spec->spec[spec->len++]; + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + + *next_targ_id = m->type; + found = bpf_core_fields_are_compat(local_btf, + local_member->type, + targ_btf, m->type); + if (!found) + spec->len--; /* pop accessor */ + return found; + } + /* member turned out not to be what we looked for */ + spec->bit_offset -= bit_offset; + spec->raw_len--; + } + + return 0; +} + +/* + * Try to match local spec to a target type and, if successful, produce full + * target spec (high-level, low-level + bit offset). + */ +static int bpf_core_spec_match(struct bpf_core_spec *local_spec, + const struct btf *targ_btf, __u32 targ_id, + struct bpf_core_spec *targ_spec) +{ + const struct btf_type *targ_type; + const struct bpf_core_accessor *local_acc; + struct bpf_core_accessor *targ_acc; + int i, sz, matched; + + memset(targ_spec, 0, sizeof(*targ_spec)); + targ_spec->btf = targ_btf; + + local_acc = &local_spec->spec[0]; + targ_acc = &targ_spec->spec[0]; + + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, + &targ_id); + if (!targ_type) + return -EINVAL; + + if (local_acc->name) { + matched = bpf_core_match_member(local_spec->btf, + local_acc, + targ_btf, targ_id, + targ_spec, &targ_id); + if (matched <= 0) + return matched; + } else { + /* for i=0, targ_id is already treated as array element + * type (because it's the original struct), for others + * we should find array element type first + */ + if (i > 0) { + const struct btf_array *a; + + if (!btf_is_array(targ_type)) + return 0; + + a = btf_array(targ_type); + if (local_acc->idx >= a->nelems) + return 0; + if (!skip_mods_and_typedefs(targ_btf, a->type, + &targ_id)) + return -EINVAL; + } + + /* too deep struct/union/array nesting */ + if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + targ_acc->type_id = targ_id; + targ_acc->idx = local_acc->idx; + targ_acc->name = NULL; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + + sz = btf__resolve_size(targ_btf, targ_id); + if (sz < 0) + return sz; + targ_spec->bit_offset += local_acc->idx * sz * 8; + } + } + + return 1; +} + +static int bpf_core_calc_field_relo(const struct bpf_program *prog, + const struct bpf_field_reloc *relo, + const struct bpf_core_spec *spec, + __u32 *val, bool *validate) +{ + const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1]; + const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id); + __u32 byte_off, byte_sz, bit_off, bit_sz; + const struct btf_member *m; + const struct btf_type *mt; + bool bitfield; + __s64 sz; + + /* a[n] accessor needs special handling */ + if (!acc->name) { + if (relo->kind == BPF_FIELD_BYTE_OFFSET) { + *val = spec->bit_offset / 8; + } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + } else { + pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", + bpf_program__title(prog, false), + relo->kind, relo->insn_off / 8); + return -EINVAL; + } + if (validate) + *validate = true; + return 0; + } + + m = btf_members(t) + acc->idx; + mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); + bit_off = spec->bit_offset; + bit_sz = btf_member_bitfield_size(t, acc->idx); + + bitfield = bit_sz > 0; + if (bitfield) { + byte_sz = mt->size; + byte_off = bit_off / 8 / byte_sz * byte_sz; + /* figure out smallest int size necessary for bitfield load */ + while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { + if (byte_sz >= 8) { + /* bitfield can't be read with 64-bit read */ + pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", + bpf_program__title(prog, false), + relo->kind, relo->insn_off / 8); + return -E2BIG; + } + byte_sz *= 2; + byte_off = bit_off / 8 / byte_sz * byte_sz; + } + } else { + sz = btf__resolve_size(spec->btf, m->type); + if (sz < 0) + return -EINVAL; + byte_sz = sz; + byte_off = spec->bit_offset / 8; + bit_sz = byte_sz * 8; + } + + /* for bitfields, all the relocatable aspects are ambiguous and we + * might disagree with compiler, so turn off validation of expected + * value, except for signedness + */ + if (validate) + *validate = !bitfield; + + switch (relo->kind) { + case BPF_FIELD_BYTE_OFFSET: + *val = byte_off; + break; + case BPF_FIELD_BYTE_SIZE: + *val = byte_sz; + break; + case BPF_FIELD_SIGNED: + /* enums will be assumed unsigned */ + *val = btf_is_enum(mt) || + (btf_int_encoding(mt) & BTF_INT_SIGNED); + if (validate) + *validate = true; /* signedness is never ambiguous */ + break; + case BPF_FIELD_LSHIFT_U64: +#if __BYTE_ORDER == __LITTLE_ENDIAN + *val = 64 - (bit_off + bit_sz - byte_off * 8); +#else + *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); +#endif + break; + case BPF_FIELD_RSHIFT_U64: + *val = 64 - bit_sz; + if (validate) + *validate = true; /* right shift is never ambiguous */ + break; + case BPF_FIELD_EXISTS: + default: + pr_warn("prog '%s': unknown relo %d at insn #%d\n", + bpf_program__title(prog, false), + relo->kind, relo->insn_off / 8); + return -EINVAL; + } + + return 0; +} + +/* + * Patch relocatable BPF instruction. + * + * Patched value is determined by relocation kind and target specification. + * For field existence relocation target spec will be NULL if field is not + * found. + * Expected insn->imm value is determined using relocation kind and local + * spec, and is checked before patching instruction. If actual insn->imm value + * is wrong, bail out with error. + * + * Currently three kinds of BPF instructions are supported: + * 1. rX = <imm> (assignment with immediate operand); + * 2. rX += <imm> (arithmetic operations with immediate operand); + */ +static int bpf_core_reloc_insn(struct bpf_program *prog, + const struct bpf_field_reloc *relo, + const struct bpf_core_spec *local_spec, + const struct bpf_core_spec *targ_spec) +{ + bool failed = false, validate = true; + __u32 orig_val, new_val; + struct bpf_insn *insn; + int insn_idx, err; + __u8 class; + + if (relo->insn_off % sizeof(struct bpf_insn)) + return -EINVAL; + insn_idx = relo->insn_off / sizeof(struct bpf_insn); + + if (relo->kind == BPF_FIELD_EXISTS) { + orig_val = 1; /* can't generate EXISTS relo w/o local field */ + new_val = targ_spec ? 1 : 0; + } else if (!targ_spec) { + failed = true; + new_val = (__u32)-1; + } else { + err = bpf_core_calc_field_relo(prog, relo, local_spec, + &orig_val, &validate); + if (err) + return err; + err = bpf_core_calc_field_relo(prog, relo, targ_spec, + &new_val, NULL); + if (err) + return err; + } + + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); + + if (class == BPF_ALU || class == BPF_ALU64) { + if (BPF_SRC(insn->code) != BPF_K) + return -EINVAL; + if (!failed && validate && insn->imm != orig_val) { + pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + insn->imm, orig_val, new_val); + return -EINVAL; + } + orig_val = insn->imm; + insn->imm = new_val; + pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + failed ? " w/ failed reloc" : "", orig_val, new_val); + } else { + pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), + insn_idx, insn->code, insn->src_reg, insn->dst_reg, + insn->off, insn->imm); + return -EINVAL; + } + + return 0; +} + +static struct btf *btf_load_raw(const char *path) +{ + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + +cleanup: + free(data); + return btf; +} + +/* + * Probe few well-known locations for vmlinux kernel image and try to load BTF + * data out of it to use for target BTF. + */ +static struct btf *bpf_core_find_kernel_btf(void) +{ + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + }; + char path[PATH_MAX + 1]; + struct utsname buf; + struct btf *btf; + int i; + + uname(&buf); + + for (i = 0; i < ARRAY_SIZE(locations); i++) { + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + + if (access(path, R_OK)) + continue; + + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); + if (IS_ERR(btf)) + continue; + + return btf; + } + + pr_warn("failed to find valid kernel BTF\n"); + return ERR_PTR(-ESRCH); +} + +/* Output spec definition in the format: + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +{ + const struct btf_type *t; + const char *s; + __u32 type_id; + int i; + + type_id = spec->spec[0].type_id; + t = btf__type_by_id(spec->btf, type_id); + s = btf__name_by_offset(spec->btf, t->name_off); + libbpf_print(level, "[%u] %s + ", type_id, s); + + for (i = 0; i < spec->raw_len; i++) + libbpf_print(level, "%d%s", spec->raw_spec[i], + i == spec->raw_len - 1 ? " => " : ":"); + + libbpf_print(level, "%u.%u @ &x", + spec->bit_offset / 8, spec->bit_offset % 8); + + for (i = 0; i < spec->len; i++) { + if (spec->spec[i].name) + libbpf_print(level, ".%s", spec->spec[i].name); + else + libbpf_print(level, "[%u]", spec->spec[i].idx); + } + +} + +static size_t bpf_core_hash_fn(const void *key, void *ctx) +{ + return (size_t)key; +} + +static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} + +static void *u32_as_hash_key(__u32 x) +{ + return (void *)(uintptr_t)x; +} + +/* + * CO-RE relocate single instruction. + * + * The outline and important points of the algorithm: + * 1. For given local type, find corresponding candidate target types. + * Candidate type is a type with the same "essential" name, ignoring + * everything after last triple underscore (___). E.g., `sample`, + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates + * for each other. Names with triple underscore are referred to as + * "flavors" and are useful, among other things, to allow to + * specify/support incompatible variations of the same kernel struct, which + * might differ between different kernel versions and/or build + * configurations. + * + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C + * converter, when deduplicated BTF of a kernel still contains more than + * one different types with the same name. In that case, ___2, ___3, etc + * are appended starting from second name conflict. But start flavors are + * also useful to be defined "locally", in BPF program, to extract same + * data from incompatible changes between different kernel + * versions/configurations. For instance, to handle field renames between + * kernel versions, one can use two flavors of the struct name with the + * same common name and use conditional relocations to extract that field, + * depending on target kernel version. + * 2. For each candidate type, try to match local specification to this + * candidate target type. Matching involves finding corresponding + * high-level spec accessors, meaning that all named fields should match, + * as well as all array accesses should be within the actual bounds. Also, + * types should be compatible (see bpf_core_fields_are_compat for details). + * 3. It is supported and expected that there might be multiple flavors + * matching the spec. As long as all the specs resolve to the same set of + * offsets across all candidates, there is no error. If there is any + * ambiguity, CO-RE relocation will fail. This is necessary to accomodate + * imprefection of BTF deduplication, which can cause slight duplication of + * the same BTF type, if some directly or indirectly referenced (by + * pointer) type gets resolved to different actual types in different + * object files. If such situation occurs, deduplicated BTF will end up + * with two (or more) structurally identical types, which differ only in + * types they refer to through pointer. This should be OK in most cases and + * is not an error. + * 4. Candidate types search is performed by linearly scanning through all + * types in target BTF. It is anticipated that this is overall more + * efficient memory-wise and not significantly worse (if not better) + * CPU-wise compared to prebuilding a map from all local type names to + * a list of candidate type names. It's also sped up by caching resolved + * list of matching candidates per each local "root" type ID, that has at + * least one bpf_field_reloc associated with it. This list is shared + * between multiple relocations for the same type ID and is updated as some + * of the candidates are pruned due to structural incompatibility. + */ +static int bpf_core_reloc_field(struct bpf_program *prog, + const struct bpf_field_reloc *relo, + int relo_idx, + const struct btf *local_btf, + const struct btf *targ_btf, + struct hashmap *cand_cache) +{ + const char *prog_name = bpf_program__title(prog, false); + struct bpf_core_spec local_spec, cand_spec, targ_spec; + const void *type_key = u32_as_hash_key(relo->type_id); + const struct btf_type *local_type, *cand_type; + const char *local_name, *cand_name; + struct ids_vec *cand_ids; + __u32 local_id, cand_id; + const char *spec_str; + int i, j, err; + + local_id = relo->type_id; + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) + return -EINVAL; + + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (str_is_empty(local_name)) + return -EINVAL; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + if (str_is_empty(spec_str)) + return -EINVAL; + + err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); + if (err) { + pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", + prog_name, relo_idx, local_id, local_name, spec_str, + err); + return -EINVAL; + } + + pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx, + relo->kind); + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { + cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); + if (IS_ERR(cand_ids)) { + pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", + prog_name, relo_idx, local_id, local_name, + PTR_ERR(cand_ids)); + return PTR_ERR(cand_ids); + } + err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); + if (err) { + bpf_core_free_cands(cand_ids); + return err; + } + } + + for (i = 0, j = 0; i < cand_ids->len; i++) { + cand_id = cand_ids->data[i]; + cand_type = btf__type_by_id(targ_btf, cand_id); + cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); + + err = bpf_core_spec_match(&local_spec, targ_btf, + cand_id, &cand_spec); + pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ", + prog_name, relo_idx, i, cand_name); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, ": %d\n", err); + if (err < 0) { + pr_warn("prog '%s': relo #%d: matching error: %d\n", + prog_name, relo_idx, err); + return err; + } + if (err == 0) + continue; + + if (j == 0) { + targ_spec = cand_spec; + } else if (cand_spec.bit_offset != targ_spec.bit_offset) { + /* if there are many candidates, they should all + * resolve to the same bit offset + */ + pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.bit_offset, + targ_spec.bit_offset); + return -EINVAL; + } + + cand_ids->data[j++] = cand_spec.spec[0].type_id; + } + + /* + * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is + * requested, it's expected that we might not find any candidates. + * In this case, if field wasn't found in any candidate, the list of + * candidates shouldn't change at all, we'll just handle relocating + * appropriately, depending on relo's kind. + */ + if (j > 0) + cand_ids->len = j; + + if (j == 0 && !prog->obj->relaxed_core_relocs && + relo->kind != BPF_FIELD_EXISTS) { + pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); + return -ESRCH; + } + + /* bpf_core_reloc_insn should know how to handle missing targ_spec */ + err = bpf_core_reloc_insn(prog, relo, &local_spec, + j ? &targ_spec : NULL); + if (err) { + pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", + prog_name, relo_idx, relo->insn_off, err); + return -EINVAL; + } + + return 0; +} + +static int +bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) +{ + const struct btf_ext_info_sec *sec; + const struct bpf_field_reloc *rec; + const struct btf_ext_info *seg; + struct hashmap_entry *entry; + struct hashmap *cand_cache = NULL; + struct bpf_program *prog; + struct btf *targ_btf; + const char *sec_name; + int i, err = 0; + + if (targ_btf_path) + targ_btf = btf__parse_elf(targ_btf_path, NULL); + else + targ_btf = bpf_core_find_kernel_btf(); + if (IS_ERR(targ_btf)) { + pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); + return PTR_ERR(targ_btf); + } + + cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); + if (IS_ERR(cand_cache)) { + err = PTR_ERR(cand_cache); + goto out; + } + + seg = &obj->btf_ext->field_reloc_info; + for_each_btf_ext_sec(seg, sec) { + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) { + err = -EINVAL; + goto out; + } + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + pr_warn("failed to find program '%s' for CO-RE offset relocation\n", + sec_name); + err = -EINVAL; + goto out; + } + + pr_debug("prog '%s': performing %d CO-RE offset relocs\n", + sec_name, sec->num_info); + + for_each_btf_ext_rec(seg, sec, i, rec) { + err = bpf_core_reloc_field(prog, rec, i, obj->btf, + targ_btf, cand_cache); + if (err) { + pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", + sec_name, i, err); + goto out; + } + } + } + +out: + btf__free(targ_btf); + if (!IS_ERR_OR_NULL(cand_cache)) { + hashmap__for_each_entry(cand_cache, entry, i) { + bpf_core_free_cands(entry->value); + } + hashmap__free(cand_cache); + } + return err; +} + +static int +bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) +{ + int err = 0; + + if (obj->btf_ext->field_reloc_info.len) + err = bpf_core_reloc_fields(obj, targ_btf_path); + + return err; +} + +static int +bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, + struct reloc_desc *relo) +{ + struct bpf_insn *insn, *new_insn; + struct bpf_program *text; + size_t new_cnt; + int err; + + if (relo->type != RELO_CALL) + return -LIBBPF_ERRNO__RELOC; + + if (prog->idx == obj->efile.text_shndx) { + pr_warn("relo in .text insn %d into off %d (insn #%d)\n", + relo->insn_idx, relo->sym_off, relo->sym_off / 8); + return -LIBBPF_ERRNO__RELOC; + } + + if (prog->main_prog_cnt == 0) { + text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); + if (!text) { + pr_warn("no .text section found yet relo into text exist\n"); + return -LIBBPF_ERRNO__RELOC; + } + new_cnt = prog->insns_cnt + text->insns_cnt; + new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); + if (!new_insn) { + pr_warn("oom in prog realloc\n"); + return -ENOMEM; + } + prog->insns = new_insn; + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + text->section_name, + prog->insns_cnt); + if (err) + return err; + } + + memcpy(new_insn + prog->insns_cnt, text->insns, + text->insns_cnt * sizeof(*insn)); + prog->main_prog_cnt = prog->insns_cnt; + prog->insns_cnt = new_cnt; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, + prog->section_name); + } + insn = &prog->insns[relo->insn_idx]; + insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; + return 0; +} + +static int +bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) +{ + int i, err; + + if (!prog) + return 0; + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + prog->section_name, 0); + if (err) + return err; + } + + if (!prog->reloc_desc) + return 0; + + for (i = 0; i < prog->nr_reloc; i++) { + struct reloc_desc *relo = &prog->reloc_desc[i]; + + if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + + if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { + pr_warn("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } + + if (relo->type != RELO_DATA) { + insn[0].src_reg = BPF_PSEUDO_MAP_FD; + } else { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[1].imm = insn[0].imm + relo->sym_off; + } + insn[0].imm = obj->maps[relo->map_idx].fd; + } else if (relo->type == RELO_CALL) { + err = bpf_program__reloc_text(prog, obj, relo); + if (err) + return err; + } + } + + zfree(&prog->reloc_desc); + prog->nr_reloc = 0; + return 0; +} + +static int +bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) +{ + struct bpf_program *prog; + size_t i; + int err; + + if (obj->btf_ext) { + err = bpf_object__relocate_core(obj, targ_btf_path); + if (err) { + pr_warn("failed to perform CO-RE relocations: %d\n", + err); + return err; + } + } + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + + err = bpf_program__relocate(prog, obj); + if (err) { + pr_warn("failed to relocate '%s'\n", prog->section_name); + return err; + } + } + return 0; +} + +static int bpf_object__collect_reloc(struct bpf_object *obj) +{ + int i, err; + + if (!obj_elf_valid(obj)) { + pr_warn("Internal error: elf object is closed\n"); + return -LIBBPF_ERRNO__INTERNAL; + } + + for (i = 0; i < obj->efile.nr_reloc_sects; i++) { + GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; + Elf_Data *data = obj->efile.reloc_sects[i].data; + int idx = shdr->sh_info; + struct bpf_program *prog; + + if (shdr->sh_type != SHT_REL) { + pr_warn("internal error at %d\n", __LINE__); + return -LIBBPF_ERRNO__INTERNAL; + } + + prog = bpf_object__find_prog_by_idx(obj, idx); + if (!prog) { + pr_warn("relocation failed: no section(%d)\n", idx); + return -LIBBPF_ERRNO__RELOC; + } + + err = bpf_program__collect_reloc(prog, shdr, data, obj); + if (err) + return err; + } + return 0; +} + +static int +load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, + char *license, __u32 kern_version, int *pfd) +{ + struct bpf_load_program_attr load_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + int log_buf_size = BPF_LOG_BUF_SIZE; + char *log_buf; + int btf_fd, ret; + + if (!insns || !insns_cnt) + return -EINVAL; + + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); + load_attr.prog_type = prog->type; + load_attr.expected_attach_type = prog->expected_attach_type; + if (prog->caps->name) + load_attr.name = prog->name; + load_attr.insns = insns; + load_attr.insns_cnt = insns_cnt; + load_attr.license = license; + if (prog->type == BPF_PROG_TYPE_TRACING) { + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_id = prog->attach_btf_id; + } else { + load_attr.kern_version = kern_version; + load_attr.prog_ifindex = prog->prog_ifindex; + } + /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ + if (prog->obj->btf_ext) + btf_fd = bpf_object__btf_fd(prog->obj); + else + btf_fd = -1; + load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; + load_attr.func_info = prog->func_info; + load_attr.func_info_rec_size = prog->func_info_rec_size; + load_attr.func_info_cnt = prog->func_info_cnt; + load_attr.line_info = prog->line_info; + load_attr.line_info_rec_size = prog->line_info_rec_size; + load_attr.line_info_cnt = prog->line_info_cnt; + load_attr.log_level = prog->log_level; + load_attr.prog_flags = prog->prog_flags; + +retry_load: + log_buf = malloc(log_buf_size); + if (!log_buf) + pr_warn("Alloc log buffer for bpf loader error, continue without log\n"); + + ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); + + if (ret >= 0) { + if (load_attr.log_level) + pr_debug("verifier log:\n%s", log_buf); + *pfd = ret; + ret = 0; + goto out; + } + + if (errno == ENOSPC) { + log_buf_size <<= 1; + free(log_buf); + goto retry_load; + } + ret = -errno; + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("load bpf program failed: %s\n", cp); + + if (log_buf && log_buf[0] != '\0') { + ret = -LIBBPF_ERRNO__VERIFY; + pr_warn("-- BEGIN DUMP LOG ---\n"); + pr_warn("\n%s\n", log_buf); + pr_warn("-- END LOG --\n"); + } else if (load_attr.insns_cnt >= BPF_MAXINSNS) { + pr_warn("Program too large (%zu insns), at most %d insns\n", + load_attr.insns_cnt, BPF_MAXINSNS); + ret = -LIBBPF_ERRNO__PROG2BIG; + } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { + /* Wrong program type? */ + int fd; + + load_attr.prog_type = BPF_PROG_TYPE_KPROBE; + load_attr.expected_attach_type = 0; + fd = bpf_load_program_xattr(&load_attr, NULL, 0); + if (fd >= 0) { + close(fd); + ret = -LIBBPF_ERRNO__PROGTYPE; + goto out; + } + } + +out: + free(log_buf); + return ret; +} + +int +bpf_program__load(struct bpf_program *prog, + char *license, __u32 kern_version) +{ + int err = 0, fd, i; + + if (prog->instances.nr < 0 || !prog->instances.fds) { + if (prog->preprocessor) { + pr_warn("Internal error: can't load program '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__INTERNAL; + } + + prog->instances.fds = malloc(sizeof(int)); + if (!prog->instances.fds) { + pr_warn("Not enough memory for BPF fds\n"); + return -ENOMEM; + } + prog->instances.nr = 1; + prog->instances.fds[0] = -1; + } + + if (!prog->preprocessor) { + if (prog->instances.nr != 1) { + pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n", + prog->section_name, prog->instances.nr); + } + err = load_program(prog, prog->insns, prog->insns_cnt, + license, kern_version, &fd); + if (!err) + prog->instances.fds[0] = fd; + goto out; + } + + for (i = 0; i < prog->instances.nr; i++) { + struct bpf_prog_prep_result result; + bpf_program_prep_t preprocessor = prog->preprocessor; + + memset(&result, 0, sizeof(result)); + err = preprocessor(prog, i, prog->insns, + prog->insns_cnt, &result); + if (err) { + pr_warn("Preprocessing the %dth instance of program '%s' failed\n", + i, prog->section_name); + goto out; + } + + if (!result.new_insn_ptr || !result.new_insn_cnt) { + pr_debug("Skip loading the %dth instance of program '%s'\n", + i, prog->section_name); + prog->instances.fds[i] = -1; + if (result.pfd) + *result.pfd = -1; + continue; + } + + err = load_program(prog, result.new_insn_ptr, + result.new_insn_cnt, + license, kern_version, &fd); + + if (err) { + pr_warn("Loading the %dth instance of program '%s' failed\n", + i, prog->section_name); + goto out; + } + + if (result.pfd) + *result.pfd = fd; + prog->instances.fds[i] = fd; + } +out: + if (err) + pr_warn("failed to load program '%s'\n", prog->section_name); + zfree(&prog->insns); + prog->insns_cnt = 0; + return err; +} + +static bool bpf_program__is_function_storage(const struct bpf_program *prog, + const struct bpf_object *obj) +{ + return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls; +} + +static int +bpf_object__load_progs(struct bpf_object *obj, int log_level) +{ + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + if (bpf_program__is_function_storage(&obj->programs[i], obj)) + continue; + obj->programs[i].log_level |= log_level; + err = bpf_program__load(&obj->programs[i], + obj->license, + obj->kern_version); + if (err) + return err; + } + return 0; +} + +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd); +static struct bpf_object * +__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts) +{ + const char *pin_root_path; + struct bpf_program *prog; + struct bpf_object *obj; + const char *obj_name; + char tmp_name[64]; + bool relaxed_maps; + __u32 attach_prog_fd; + int err; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("failed to init libelf for %s\n", + path ? : "(mem buf)"); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + if (!OPTS_VALID(opts, bpf_object_open_opts)) + return ERR_PTR(-EINVAL); + + obj_name = OPTS_GET(opts, object_name, NULL); + if (obj_buf) { + if (!obj_name) { + snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", + (unsigned long)obj_buf, + (unsigned long)obj_buf_sz); + obj_name = tmp_name; + } + path = obj_name; + pr_debug("loading object '%s' from buffer\n", obj_name); + } + + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); + if (IS_ERR(obj)) + return obj; + + obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); + relaxed_maps = OPTS_GET(opts, relaxed_maps, false); + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); + attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); + + CHECK_ERR(bpf_object__elf_init(obj), err, out); + CHECK_ERR(bpf_object__check_endianness(obj), err, out); + CHECK_ERR(bpf_object__probe_caps(obj), err, out); + CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), + err, out); + CHECK_ERR(bpf_object__collect_reloc(obj), err, out); + bpf_object__elf_finish(obj); + + bpf_object__for_each_program(prog, obj) { + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + + err = libbpf_prog_type_by_name(prog->section_name, &prog_type, + &attach_type); + if (err == -ESRCH) + /* couldn't guess, but user might manually specify */ + continue; + if (err) + goto out; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + if (prog_type == BPF_PROG_TYPE_TRACING) { + err = libbpf_find_attach_btf_id(prog->section_name, + attach_type, + attach_prog_fd); + if (err <= 0) + goto out; + prog->attach_btf_id = err; + prog->attach_prog_fd = attach_prog_fd; + } + } + + return obj; +out: + bpf_object__close(obj); + return ERR_PTR(err); +} + +static struct bpf_object * +__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .relaxed_maps = flags & MAPS_RELAX_COMPAT, + ); + + /* param validation */ + if (!attr->file) + return NULL; + + pr_debug("loading %s\n", attr->file); + return __bpf_object__open(attr->file, NULL, 0, &opts); +} + +struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) +{ + return __bpf_object__open_xattr(attr, 0); +} + +struct bpf_object *bpf_object__open(const char *path) +{ + struct bpf_object_open_attr attr = { + .file = path, + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; + + return bpf_object__open_xattr(&attr); +} + +struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) +{ + if (!path) + return ERR_PTR(-EINVAL); + + pr_debug("loading %s\n", path); + + return __bpf_object__open(path, NULL, 0, opts); +} + +struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts) +{ + if (!obj_buf || obj_buf_sz == 0) + return ERR_PTR(-EINVAL); + + return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts); +} + +struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, + const char *name) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .object_name = name, + /* wrong default, but backwards-compatible */ + .relaxed_maps = true, + ); + + /* returning NULL is wrong, but backwards-compatible */ + if (!obj_buf || obj_buf_sz == 0) + return NULL; + + return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); +} + +int bpf_object__unload(struct bpf_object *obj) +{ + size_t i; + + if (!obj) + return -EINVAL; + + for (i = 0; i < obj->nr_maps; i++) + zclose(obj->maps[i].fd); + + for (i = 0; i < obj->nr_programs; i++) + bpf_program__unload(&obj->programs[i]); + + return 0; +} + +int bpf_object__load_xattr(struct bpf_object_load_attr *attr) +{ + struct bpf_object *obj; + int err, i; + + if (!attr) + return -EINVAL; + obj = attr->obj; + if (!obj) + return -EINVAL; + + if (obj->loaded) { + pr_warn("object should not be loaded twice\n"); + return -EINVAL; + } + + obj->loaded = true; + + CHECK_ERR(bpf_object__create_maps(obj), err, out); + CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); + CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); + + return 0; +out: + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); + + bpf_object__unload(obj); + pr_warn("failed to load object '%s'\n", obj->path); + return err; +} + +int bpf_object__load(struct bpf_object *obj) +{ + struct bpf_object_load_attr attr = { + .obj = obj, + }; + + return bpf_object__load_xattr(&attr); +} + +static int make_parent_dir(const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + char *dname, *dir; + int err = 0; + + dname = strdup(path); + if (dname == NULL) + return -ENOMEM; + + dir = dirname(dname); + if (mkdir(dir, 0700) && errno != EEXIST) + err = -errno; + + free(dname); + if (err) { + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("failed to mkdir %s: %s\n", path, cp); + } + return err; +} + +static int check_path(const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct statfs st_fs; + char *dname, *dir; + int err = 0; + + if (path == NULL) + return -EINVAL; + + dname = strdup(path); + if (dname == NULL) + return -ENOMEM; + + dir = dirname(dname); + if (statfs(dir, &st_fs)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("failed to statfs %s: %s\n", dir, cp); + err = -errno; + } + free(dname); + + if (!err && st_fs.f_type != BPF_FS_MAGIC) { + pr_warn("specified path %s is not on BPF FS\n", path); + err = -EINVAL; + } + + return err; +} + +int bpf_program__pin_instance(struct bpf_program *prog, const char *path, + int instance) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err; + + err = make_parent_dir(path); + if (err) + return err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warn("invalid program pointer\n"); + return -EINVAL; + } + + if (instance < 0 || instance >= prog->instances.nr) { + pr_warn("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + if (bpf_obj_pin(prog->instances.fds[instance], path)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("failed to pin program: %s\n", cp); + return -errno; + } + pr_debug("pinned program '%s'\n", path); + + return 0; +} + +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, + int instance) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warn("invalid program pointer\n"); + return -EINVAL; + } + + if (instance < 0 || instance >= prog->instances.nr) { + pr_warn("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned program '%s'\n", path); + + return 0; +} + +int bpf_program__pin(struct bpf_program *prog, const char *path) +{ + int i, err; + + err = make_parent_dir(path); + if (err) + return err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warn("invalid program pointer\n"); + return -EINVAL; + } + + if (prog->instances.nr <= 0) { + pr_warn("no instances of prog %s to pin\n", + prog->section_name); + return -EINVAL; + } + + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__pin_instance(prog, path, 0); + } + + for (i = 0; i < prog->instances.nr; i++) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) { + err = -EINVAL; + goto err_unpin; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin; + } + + err = bpf_program__pin_instance(prog, buf, i); + if (err) + goto err_unpin; + } + + return 0; + +err_unpin: + for (i = i - 1; i >= 0; i--) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin_instance(prog, buf, i); + } + + rmdir(path); + + return err; +} + +int bpf_program__unpin(struct bpf_program *prog, const char *path) +{ + int i, err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warn("invalid program pointer\n"); + return -EINVAL; + } + + if (prog->instances.nr <= 0) { + pr_warn("no instances of prog %s to pin\n", + prog->section_name); + return -EINVAL; + } + + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__unpin_instance(prog, path, 0); + } + + for (i = 0; i < prog->instances.nr; i++) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_program__unpin_instance(prog, buf, i); + if (err) + return err; + } + + err = rmdir(path); + if (err) + return -errno; + + return 0; +} + +int bpf_map__pin(struct bpf_map *map, const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err; + + if (map == NULL) { + pr_warn("invalid map pointer\n"); + return -EINVAL; + } + + if (map->pin_path) { + if (path && strcmp(path, map->pin_path)) { + pr_warn("map '%s' already has pin path '%s' different from '%s'\n", + bpf_map__name(map), map->pin_path, path); + return -EINVAL; + } else if (map->pinned) { + pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", + bpf_map__name(map), map->pin_path); + return 0; + } + } else { + if (!path) { + pr_warn("missing a path to pin map '%s' at\n", + bpf_map__name(map)); + return -EINVAL; + } else if (map->pinned) { + pr_warn("map '%s' already pinned\n", bpf_map__name(map)); + return -EEXIST; + } + + map->pin_path = strdup(path); + if (!map->pin_path) { + err = -errno; + goto out_err; + } + } + + err = make_parent_dir(map->pin_path); + if (err) + return err; + + err = check_path(map->pin_path); + if (err) + return err; + + if (bpf_obj_pin(map->fd, map->pin_path)) { + err = -errno; + goto out_err; + } + + map->pinned = true; + pr_debug("pinned map '%s'\n", map->pin_path); + + return 0; + +out_err: + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("failed to pin map: %s\n", cp); + return err; +} + +int bpf_map__unpin(struct bpf_map *map, const char *path) +{ + int err; + + if (map == NULL) { + pr_warn("invalid map pointer\n"); + return -EINVAL; + } + + if (map->pin_path) { + if (path && strcmp(path, map->pin_path)) { + pr_warn("map '%s' already has pin path '%s' different from '%s'\n", + bpf_map__name(map), map->pin_path, path); + return -EINVAL; + } + path = map->pin_path; + } else if (!path) { + pr_warn("no path to unpin map '%s' from\n", + bpf_map__name(map)); + return -EINVAL; + } + + err = check_path(path); + if (err) + return err; + + err = unlink(path); + if (err != 0) + return -errno; + + map->pinned = false; + pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); + + return 0; +} + +int bpf_map__set_pin_path(struct bpf_map *map, const char *path) +{ + char *new = NULL; + + if (path) { + new = strdup(path); + if (!new) + return -errno; + } + + free(map->pin_path); + map->pin_path = new; + return 0; +} + +const char *bpf_map__get_pin_path(const struct bpf_map *map) +{ + return map->pin_path; +} + +bool bpf_map__is_pinned(const struct bpf_map *map) +{ + return map->pinned; +} + +int bpf_object__pin_maps(struct bpf_object *obj, const char *path) +{ + struct bpf_map *map; + int err; + + if (!obj) + return -ENOENT; + + if (!obj->loaded) { + pr_warn("object not yet loaded; load it first\n"); + return -ENOENT; + } + + bpf_object__for_each_map(map, obj) { + char *pin_path = NULL; + char buf[PATH_MAX]; + + if (path) { + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) { + err = -EINVAL; + goto err_unpin_maps; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_maps; + } + pin_path = buf; + } else if (!map->pin_path) { + continue; + } + + err = bpf_map__pin(map, pin_path); + if (err) + goto err_unpin_maps; + } + + return 0; + +err_unpin_maps: + while ((map = bpf_map__prev(map, obj))) { + if (!map->pin_path) + continue; + + bpf_map__unpin(map, NULL); + } + + return err; +} + +int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) +{ + struct bpf_map *map; + int err; + + if (!obj) + return -ENOENT; + + bpf_object__for_each_map(map, obj) { + char *pin_path = NULL; + char buf[PATH_MAX]; + + if (path) { + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + pin_path = buf; + } else if (!map->pin_path) { + continue; + } + + err = bpf_map__unpin(map, pin_path); + if (err) + return err; + } + + return 0; +} + +int bpf_object__pin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + if (!obj->loaded) { + pr_warn("object not yet loaded; load it first\n"); + return -ENOENT; + } + + bpf_object__for_each_program(prog, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); + if (len < 0) { + err = -EINVAL; + goto err_unpin_programs; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_programs; + } + + err = bpf_program__pin(prog, buf); + if (err) + goto err_unpin_programs; + } + + return 0; + +err_unpin_programs: + while ((prog = bpf_program__prev(prog, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin(prog, buf); + } + + return err; +} + +int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + bpf_object__for_each_program(prog, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_program__unpin(prog, buf); + if (err) + return err; + } + + return 0; +} + +int bpf_object__pin(struct bpf_object *obj, const char *path) +{ + int err; + + err = bpf_object__pin_maps(obj, path); + if (err) + return err; + + err = bpf_object__pin_programs(obj, path); + if (err) { + bpf_object__unpin_maps(obj, path); + return err; + } + + return 0; +} + +void bpf_object__close(struct bpf_object *obj) +{ + size_t i; + + if (!obj) + return; + + if (obj->clear_priv) + obj->clear_priv(obj, obj->priv); + + bpf_object__elf_finish(obj); + bpf_object__unload(obj); + btf__free(obj->btf); + btf_ext__free(obj->btf_ext); + + for (i = 0; i < obj->nr_maps; i++) { + zfree(&obj->maps[i].name); + zfree(&obj->maps[i].pin_path); + if (obj->maps[i].clear_priv) + obj->maps[i].clear_priv(&obj->maps[i], + obj->maps[i].priv); + obj->maps[i].priv = NULL; + obj->maps[i].clear_priv = NULL; + } + + zfree(&obj->sections.rodata); + zfree(&obj->sections.data); + zfree(&obj->maps); + obj->nr_maps = 0; + + if (obj->programs && obj->nr_programs) { + for (i = 0; i < obj->nr_programs; i++) + bpf_program__exit(&obj->programs[i]); + } + zfree(&obj->programs); + + list_del(&obj->list); + free(obj); +} + +struct bpf_object * +bpf_object__next(struct bpf_object *prev) +{ + struct bpf_object *next; + + if (!prev) + next = list_first_entry(&bpf_objects_list, + struct bpf_object, + list); + else + next = list_next_entry(prev, list); + + /* Empty list is noticed here so don't need checking on entry. */ + if (&next->list == &bpf_objects_list) + return NULL; + + return next; +} + +const char *bpf_object__name(const struct bpf_object *obj) +{ + return obj ? obj->name : ERR_PTR(-EINVAL); +} + +unsigned int bpf_object__kversion(const struct bpf_object *obj) +{ + return obj ? obj->kern_version : 0; +} + +struct btf *bpf_object__btf(const struct bpf_object *obj) +{ + return obj ? obj->btf : NULL; +} + +int bpf_object__btf_fd(const struct bpf_object *obj) +{ + return obj->btf ? btf__fd(obj->btf) : -1; +} + +int bpf_object__set_priv(struct bpf_object *obj, void *priv, + bpf_object_clear_priv_t clear_priv) +{ + if (obj->priv && obj->clear_priv) + obj->clear_priv(obj, obj->priv); + + obj->priv = priv; + obj->clear_priv = clear_priv; + return 0; +} + +void *bpf_object__priv(const struct bpf_object *obj) +{ + return obj ? obj->priv : ERR_PTR(-EINVAL); +} + +static struct bpf_program * +__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, + bool forward) +{ + size_t nr_programs = obj->nr_programs; + ssize_t idx; + + if (!nr_programs) + return NULL; + + if (!p) + /* Iter from the beginning */ + return forward ? &obj->programs[0] : + &obj->programs[nr_programs - 1]; + + if (p->obj != obj) { + pr_warn("error: program handler doesn't match object\n"); + return NULL; + } + + idx = (p - obj->programs) + (forward ? 1 : -1); + if (idx >= obj->nr_programs || idx < 0) + return NULL; + return &obj->programs[idx]; +} + +struct bpf_program * +bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) +{ + struct bpf_program *prog = prev; + + do { + prog = __bpf_program__iter(prog, obj, true); + } while (prog && bpf_program__is_function_storage(prog, obj)); + + return prog; +} + +struct bpf_program * +bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) +{ + struct bpf_program *prog = next; + + do { + prog = __bpf_program__iter(prog, obj, false); + } while (prog && bpf_program__is_function_storage(prog, obj)); + + return prog; +} + +int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv) +{ + if (prog->priv && prog->clear_priv) + prog->clear_priv(prog, prog->priv); + + prog->priv = priv; + prog->clear_priv = clear_priv; + return 0; +} + +void *bpf_program__priv(const struct bpf_program *prog) +{ + return prog ? prog->priv : ERR_PTR(-EINVAL); +} + +void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) +{ + prog->prog_ifindex = ifindex; +} + +const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) +{ + const char *title; + + title = prog->section_name; + if (needs_copy) { + title = strdup(title); + if (!title) { + pr_warn("failed to strdup program title\n"); + return ERR_PTR(-ENOMEM); + } + } + + return title; +} + +int bpf_program__fd(const struct bpf_program *prog) +{ + return bpf_program__nth_fd(prog, 0); +} + +size_t bpf_program__size(const struct bpf_program *prog) +{ + return prog->insns_cnt * sizeof(struct bpf_insn); +} + +int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, + bpf_program_prep_t prep) +{ + int *instances_fds; + + if (nr_instances <= 0 || !prep) + return -EINVAL; + + if (prog->instances.nr > 0 || prog->instances.fds) { + pr_warn("Can't set pre-processor after loading\n"); + return -EINVAL; + } + + instances_fds = malloc(sizeof(int) * nr_instances); + if (!instances_fds) { + pr_warn("alloc memory failed for fds\n"); + return -ENOMEM; + } + + /* fill all fd with -1 */ + memset(instances_fds, -1, sizeof(int) * nr_instances); + + prog->instances.nr = nr_instances; + prog->instances.fds = instances_fds; + prog->preprocessor = prep; + return 0; +} + +int bpf_program__nth_fd(const struct bpf_program *prog, int n) +{ + int fd; + + if (!prog) + return -EINVAL; + + if (n >= prog->instances.nr || n < 0) { + pr_warn("Can't get the %dth fd from program %s: only %d instances\n", + n, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + fd = prog->instances.fds[n]; + if (fd < 0) { + pr_warn("%dth instance of program '%s' is invalid\n", + n, prog->section_name); + return -ENOENT; + } + + return fd; +} + +enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog) +{ + return prog->type; +} + +void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +{ + prog->type = type; +} + +static bool bpf_program__is_type(const struct bpf_program *prog, + enum bpf_prog_type type) +{ + return prog ? (prog->type == type) : false; +} + +#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ +int bpf_program__set_##NAME(struct bpf_program *prog) \ +{ \ + if (!prog) \ + return -EINVAL; \ + bpf_program__set_type(prog, TYPE); \ + return 0; \ +} \ + \ +bool bpf_program__is_##NAME(const struct bpf_program *prog) \ +{ \ + return bpf_program__is_type(prog, TYPE); \ +} \ + +BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); +BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); +BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); +BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); +BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); +BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); +BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); +BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); + +enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog) +{ + return prog->expected_attach_type; +} + +void bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type) +{ + prog->expected_attach_type = type; +} + +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \ + { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype } + +/* Programs that can NOT be attached. */ +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) + +/* Programs that can be attached. */ +#define BPF_APROG_SEC(string, ptype, atype) \ + BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype) + +/* Programs that must specify expected attach type at load time. */ +#define BPF_EAPROG_SEC(string, ptype, eatype) \ + BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype) + +/* Programs that use BTF to identify attach point */ +#define BPF_PROG_BTF(string, ptype, eatype) \ + BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0) + +/* Programs that can be attached but attach type can't be identified by section + * name. Kept for backward compatibility. + */ +#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) + +static const struct { + const char *sec; + size_t len; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + bool is_attachable; + bool is_attach_btf; + enum bpf_attach_type attach_type; +} section_names[] = { + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), + BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_RAW_TP), + BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_FENTRY), + BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_FEXIT), + BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), + BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), + BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), + BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), + BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB, + BPF_CGROUP_INET_INGRESS), + BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB, + BPF_CGROUP_INET_EGRESS), + BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET_SOCK_CREATE), + BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET4_POST_BIND), + BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET6_POST_BIND), + BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE, + BPF_CGROUP_DEVICE), + BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS, + BPF_CGROUP_SOCK_OPS), + BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB, + BPF_SK_SKB_STREAM_PARSER), + BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB, + BPF_SK_SKB_STREAM_VERDICT), + BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB), + BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG, + BPF_SK_MSG_VERDICT), + BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2, + BPF_LIRC_MODE2), + BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_FLOW_DISSECTOR), + BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_BIND), + BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_BIND), + BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_CONNECT), + BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_CONNECT), + BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP4_SENDMSG), + BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP6_SENDMSG), + BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP4_RECVMSG), + BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP6_RECVMSG), + BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_CGROUP_SYSCTL), + BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_CGROUP_GETSOCKOPT), + BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_CGROUP_SETSOCKOPT), +}; + +#undef BPF_PROG_SEC_IMPL +#undef BPF_PROG_SEC +#undef BPF_APROG_SEC +#undef BPF_EAPROG_SEC +#undef BPF_APROG_COMPAT + +#define MAX_TYPE_NAME_SIZE 32 + +static char *libbpf_get_type_names(bool attach_type) +{ + int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + char *buf; + + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + /* Forge string buf with all available names */ + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (attach_type && !section_names[i].is_attachable) + continue; + + if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + free(buf); + return NULL; + } + strcat(buf, " "); + strcat(buf, section_names[i].sec); + } + + return buf; +} + +int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) +{ + char *type_names; + int i; + + if (!name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (strncmp(name, section_names[i].sec, section_names[i].len)) + continue; + *prog_type = section_names[i].prog_type; + *expected_attach_type = section_names[i].expected_attach_type; + return 0; + } + pr_warn("failed to guess program type from ELF section '%s'\n", name); + type_names = libbpf_get_type_names(false); + if (type_names != NULL) { + pr_info("supported section(type) names are:%s\n", type_names); + free(type_names); + } + + return -ESRCH; +} + +#define BTF_PREFIX "btf_trace_" +int libbpf_find_vmlinux_btf_id(const char *name, + enum bpf_attach_type attach_type) +{ + struct btf *btf = bpf_core_find_kernel_btf(); + char raw_tp_btf[128] = BTF_PREFIX; + char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; + const char *btf_name; + int err = -EINVAL; + __u32 kind; + + if (IS_ERR(btf)) { + pr_warn("vmlinux BTF is not found\n"); + return -EINVAL; + } + + if (attach_type == BPF_TRACE_RAW_TP) { + /* prepend "btf_trace_" prefix per kernel convention */ + strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); + btf_name = raw_tp_btf; + kind = BTF_KIND_TYPEDEF; + } else { + btf_name = name; + kind = BTF_KIND_FUNC; + } + err = btf__find_by_name_kind(btf, btf_name, kind); + btf__free(btf); + return err; +} + +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info *info; + struct btf *btf = NULL; + int err = -EINVAL; + + info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); + if (IS_ERR_OR_NULL(info_linear)) { + pr_warn("failed get_prog_info_linear for FD %d\n", + attach_prog_fd); + return -EINVAL; + } + info = &info_linear->info; + if (!info->btf_id) { + pr_warn("The target program doesn't have BTF\n"); + goto out; + } + if (btf__get_from_id(info->btf_id, &btf)) { + pr_warn("Failed to get BTF of the program\n"); + goto out; + } + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + btf__free(btf); + if (err <= 0) { + pr_warn("%s is not found in prog's BTF\n", name); + goto out; + } +out: + free(info_linear); + return err; +} + +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd) +{ + int i, err; + + if (!name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (!section_names[i].is_attach_btf) + continue; + if (strncmp(name, section_names[i].sec, section_names[i].len)) + continue; + if (attach_prog_fd) + err = libbpf_find_prog_btf_id(name + section_names[i].len, + attach_prog_fd); + else + err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, + attach_type); + if (err <= 0) + pr_warn("%s is not found in vmlinux BTF\n", name); + return err; + } + pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); + return -ESRCH; +} + +int libbpf_attach_type_by_name(const char *name, + enum bpf_attach_type *attach_type) +{ + char *type_names; + int i; + + if (!name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (strncmp(name, section_names[i].sec, section_names[i].len)) + continue; + if (!section_names[i].is_attachable) + return -EINVAL; + *attach_type = section_names[i].attach_type; + return 0; + } + pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(true); + if (type_names != NULL) { + pr_info("attachable section(type) names are:%s\n", type_names); + free(type_names); + } + + return -EINVAL; +} + +int bpf_map__fd(const struct bpf_map *map) +{ + return map ? map->fd : -EINVAL; +} + +const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) +{ + return map ? &map->def : ERR_PTR(-EINVAL); +} + +const char *bpf_map__name(const struct bpf_map *map) +{ + return map ? map->name : NULL; +} + +__u32 bpf_map__btf_key_type_id(const struct bpf_map *map) +{ + return map ? map->btf_key_type_id : 0; +} + +__u32 bpf_map__btf_value_type_id(const struct bpf_map *map) +{ + return map ? map->btf_value_type_id : 0; +} + +int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv) +{ + if (!map) + return -EINVAL; + + if (map->priv) { + if (map->clear_priv) + map->clear_priv(map, map->priv); + } + + map->priv = priv; + map->clear_priv = clear_priv; + return 0; +} + +void *bpf_map__priv(const struct bpf_map *map) +{ + return map ? map->priv : ERR_PTR(-EINVAL); +} + +bool bpf_map__is_offload_neutral(const struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; +} + +bool bpf_map__is_internal(const struct bpf_map *map) +{ + return map->libbpf_type != LIBBPF_MAP_UNSPEC; +} + +void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) +{ + map->map_ifindex = ifindex; +} + +int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) +{ + if (!bpf_map_type__is_map_in_map(map->def.type)) { + pr_warn("error: unsupported map type\n"); + return -EINVAL; + } + if (map->inner_map_fd != -1) { + pr_warn("error: inner_map_fd already specified\n"); + return -EINVAL; + } + map->inner_map_fd = fd; + return 0; +} + +static struct bpf_map * +__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) +{ + ssize_t idx; + struct bpf_map *s, *e; + + if (!obj || !obj->maps) + return NULL; + + s = obj->maps; + e = obj->maps + obj->nr_maps; + + if ((m < s) || (m >= e)) { + pr_warn("error in %s: map handler doesn't belong to object\n", + __func__); + return NULL; + } + + idx = (m - obj->maps) + i; + if (idx >= obj->nr_maps || idx < 0) + return NULL; + return &obj->maps[idx]; +} + +struct bpf_map * +bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) +{ + if (prev == NULL) + return obj->maps; + + return __bpf_map__iter(prev, obj, 1); +} + +struct bpf_map * +bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) +{ + if (next == NULL) { + if (!obj->nr_maps) + return NULL; + return obj->maps + obj->nr_maps - 1; + } + + return __bpf_map__iter(next, obj, -1); +} + +struct bpf_map * +bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) +{ + struct bpf_map *pos; + + bpf_object__for_each_map(pos, obj) { + if (pos->name && !strcmp(pos->name, name)) + return pos; + } + return NULL; +} + +int +bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) +{ + return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); +} + +struct bpf_map * +bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) +{ + return ERR_PTR(-ENOTSUP); +} + +long libbpf_get_error(const void *ptr) +{ + return PTR_ERR_OR_ZERO(ptr); +} + +int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_prog_load_attr attr; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = type; + attr.expected_attach_type = 0; + + return bpf_prog_load_xattr(&attr, pobj, prog_fd); +} + +int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_object_open_attr open_attr = {}; + struct bpf_program *prog, *first_prog = NULL; + struct bpf_object *obj; + struct bpf_map *map; + int err; + + if (!attr) + return -EINVAL; + if (!attr->file) + return -EINVAL; + + open_attr.file = attr->file; + open_attr.prog_type = attr->prog_type; + + obj = bpf_object__open_xattr(&open_attr); + if (IS_ERR_OR_NULL(obj)) + return -ENOENT; + + bpf_object__for_each_program(prog, obj) { + enum bpf_attach_type attach_type = attr->expected_attach_type; + /* + * to preserve backwards compatibility, bpf_prog_load treats + * attr->prog_type, if specified, as an override to whatever + * bpf_object__open guessed + */ + if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { + bpf_program__set_type(prog, attr->prog_type); + bpf_program__set_expected_attach_type(prog, + attach_type); + } + if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { + /* + * we haven't guessed from section name and user + * didn't provide a fallback type, too bad... + */ + bpf_object__close(obj); + return -EINVAL; + } + + prog->prog_ifindex = attr->ifindex; + prog->log_level = attr->log_level; + prog->prog_flags = attr->prog_flags; + if (!first_prog) + first_prog = prog; + } + + bpf_object__for_each_map(map, obj) { + if (!bpf_map__is_offload_neutral(map)) + map->map_ifindex = attr->ifindex; + } + + if (!first_prog) { + pr_warn("object file doesn't contain bpf program\n"); + bpf_object__close(obj); + return -ENOENT; + } + + err = bpf_object__load(obj); + if (err) { + bpf_object__close(obj); + return -EINVAL; + } + + *pobj = obj; + *prog_fd = bpf_program__fd(first_prog); + return 0; +} + +struct bpf_link { + int (*destroy)(struct bpf_link *link); +}; + +int bpf_link__destroy(struct bpf_link *link) +{ + int err; + + if (!link) + return 0; + + err = link->destroy(link); + free(link); + + return err; +} + +struct bpf_link_fd { + struct bpf_link link; /* has to be at the top of struct */ + int fd; /* hook FD */ +}; + +static int bpf_link__destroy_perf_event(struct bpf_link *link) +{ + struct bpf_link_fd *l = (void *)link; + int err; + + err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0); + if (err) + err = -errno; + + close(l->fd); + return err; +} + +struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, + int pfd) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link_fd *link; + int prog_fd, err; + + if (pfd < 0) { + pr_warn("program '%s': invalid perf event FD %d\n", + bpf_program__title(prog, false), pfd); + return ERR_PTR(-EINVAL); + } + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = malloc(sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->link.destroy = &bpf_link__destroy_perf_event; + link->fd = pfd; + + if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { + err = -errno; + free(link); + pr_warn("program '%s': failed to attach to pfd %d: %s\n", + bpf_program__title(prog, false), pfd, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return ERR_PTR(err); + } + if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + free(link); + pr_warn("program '%s': failed to enable pfd %d: %s\n", + bpf_program__title(prog, false), pfd, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return ERR_PTR(err); + } + return (struct bpf_link *)link; +} + +/* + * this function is expected to parse integer in the range of [0, 2^31-1] from + * given file using scanf format string fmt. If actual parsed value is + * negative, the result might be indistinguishable from error + */ +static int parse_uint_from_file(const char *file, const char *fmt) +{ + char buf[STRERR_BUFSIZE]; + int err, ret; + FILE *f; + + f = fopen(file, "r"); + if (!f) { + err = -errno; + pr_debug("failed to open '%s': %s\n", file, + libbpf_strerror_r(err, buf, sizeof(buf))); + return err; + } + err = fscanf(f, fmt, &ret); + if (err != 1) { + err = err == EOF ? -EIO : -errno; + pr_debug("failed to parse '%s': %s\n", file, + libbpf_strerror_r(err, buf, sizeof(buf))); + fclose(f); + return err; + } + fclose(f); + return ret; +} + +static int determine_kprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/kprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_uprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_kprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static int determine_uprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, + uint64_t offset, int pid) +{ + struct perf_event_attr attr = {}; + char errmsg[STRERR_BUFSIZE]; + int type, pfd, err; + + type = uprobe ? determine_uprobe_perf_type() + : determine_kprobe_perf_type(); + if (type < 0) { + pr_warn("failed to determine %s perf type: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + return type; + } + if (retprobe) { + int bit = uprobe ? determine_uprobe_retprobe_bit() + : determine_kprobe_retprobe_bit(); + + if (bit < 0) { + pr_warn("failed to determine %s retprobe bit: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); + return bit; + } + attr.config |= 1 << bit; + } + attr.size = sizeof(attr); + attr.type = type; + attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ + attr.config2 = offset; /* kprobe_addr or probe_offset */ + + /* pid filter is meaningful only for uprobes */ + pfd = syscall(__NR_perf_event_open, &attr, + pid < 0 ? -1 : pid /* pid */, + pid == -1 ? 0 : -1 /* cpu */, + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warn("%s perf_event_open() failed: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + return pfd; +} + +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, + bool retprobe, + const char *func_name) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int pfd, err; + + pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, + 0 /* offset */, -1 /* pid */); + if (pfd < 0) { + pr_warn("program '%s': failed to create %s '%s' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link = bpf_program__attach_perf_event(prog, pfd); + if (IS_ERR(link)) { + close(pfd); + err = PTR_ERR(link); + pr_warn("program '%s': failed to attach to %s '%s': %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return link; + } + return link; +} + +struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, + bool retprobe, pid_t pid, + const char *binary_path, + size_t func_offset) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int pfd, err; + + pfd = perf_event_open_probe(true /* uprobe */, retprobe, + binary_path, func_offset, pid); + if (pfd < 0) { + pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link = bpf_program__attach_perf_event(prog, pfd); + if (IS_ERR(link)) { + close(pfd); + err = PTR_ERR(link); + pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return link; + } + return link; +} + +static int determine_tracepoint_id(const char *tp_category, + const char *tp_name) +{ + char file[PATH_MAX]; + int ret; + + ret = snprintf(file, sizeof(file), + "/sys/kernel/debug/tracing/events/%s/%s/id", + tp_category, tp_name); + if (ret < 0) + return -errno; + if (ret >= sizeof(file)) { + pr_debug("tracepoint %s/%s path is too long\n", + tp_category, tp_name); + return -E2BIG; + } + return parse_uint_from_file(file, "%d\n"); +} + +static int perf_event_open_tracepoint(const char *tp_category, + const char *tp_name) +{ + struct perf_event_attr attr = {}; + char errmsg[STRERR_BUFSIZE]; + int tp_id, pfd, err; + + tp_id = determine_tracepoint_id(tp_category, tp_name); + if (tp_id < 0) { + pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", + tp_category, tp_name, + libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + return tp_id; + } + + attr.type = PERF_TYPE_TRACEPOINT; + attr.size = sizeof(attr); + attr.config = tp_id; + + pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + return pfd; +} + +struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, + const char *tp_category, + const char *tp_name) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int pfd, err; + + pfd = perf_event_open_tracepoint(tp_category, tp_name); + if (pfd < 0) { + pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link = bpf_program__attach_perf_event(prog, pfd); + if (IS_ERR(link)) { + close(pfd); + err = PTR_ERR(link); + pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return link; + } + return link; +} + +static int bpf_link__destroy_fd(struct bpf_link *link) +{ + struct bpf_link_fd *l = (void *)link; + + return close(l->fd); +} + +struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, + const char *tp_name) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link_fd *link; + int prog_fd, pfd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = malloc(sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->link.destroy = &bpf_link__destroy_fd; + + pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); + if (pfd < 0) { + pfd = -errno; + free(link); + pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", + bpf_program__title(prog, false), tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link->fd = pfd; + return (struct bpf_link *)link; +} + +struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link_fd *link; + int prog_fd, pfd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = malloc(sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->link.destroy = &bpf_link__destroy_fd; + + pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + if (pfd < 0) { + pfd = -errno; + free(link); + pr_warn("program '%s': failed to attach to trace: %s\n", + bpf_program__title(prog, false), + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link->fd = pfd; + return (struct bpf_link *)link; +} + +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) +{ + struct perf_event_mmap_page *header = mmap_mem; + __u64 data_head = ring_buffer_read_head(header); + __u64 data_tail = header->data_tail; + void *base = ((__u8 *)header) + page_size; + int ret = LIBBPF_PERF_EVENT_CONT; + struct perf_event_header *ehdr; + size_t ehdr_size; + + while (data_head != data_tail) { + ehdr = base + (data_tail & (mmap_size - 1)); + ehdr_size = ehdr->size; + + if (((void *)ehdr) + ehdr_size > base + mmap_size) { + void *copy_start = ehdr; + size_t len_first = base + mmap_size - copy_start; + size_t len_secnd = ehdr_size - len_first; + + if (*copy_size < ehdr_size) { + free(*copy_mem); + *copy_mem = malloc(ehdr_size); + if (!*copy_mem) { + *copy_size = 0; + ret = LIBBPF_PERF_EVENT_ERROR; + break; + } + *copy_size = ehdr_size; + } + + memcpy(*copy_mem, copy_start, len_first); + memcpy(*copy_mem + len_first, base, len_secnd); + ehdr = *copy_mem; + } + + ret = fn(ehdr, private_data); + data_tail += ehdr_size; + if (ret != LIBBPF_PERF_EVENT_CONT) + break; + } + + ring_buffer_write_tail(header, data_tail); + return ret; +} + +struct perf_buffer; + +struct perf_buffer_params { + struct perf_event_attr *attr; + /* if event_cb is specified, it takes precendence */ + perf_buffer_event_fn event_cb; + /* sample_cb and lost_cb are higher-level common-case callbacks */ + perf_buffer_sample_fn sample_cb; + perf_buffer_lost_fn lost_cb; + void *ctx; + int cpu_cnt; + int *cpus; + int *map_keys; +}; + +struct perf_cpu_buf { + struct perf_buffer *pb; + void *base; /* mmap()'ed memory */ + void *buf; /* for reconstructing segmented data */ + size_t buf_size; + int fd; + int cpu; + int map_key; +}; + +struct perf_buffer { + perf_buffer_event_fn event_cb; + perf_buffer_sample_fn sample_cb; + perf_buffer_lost_fn lost_cb; + void *ctx; /* passed into callbacks */ + + size_t page_size; + size_t mmap_size; + struct perf_cpu_buf **cpu_bufs; + struct epoll_event *events; + int cpu_cnt; + int epoll_fd; /* perf event FD */ + int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ +}; + +static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, + struct perf_cpu_buf *cpu_buf) +{ + if (!cpu_buf) + return; + if (cpu_buf->base && + munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) + pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); + if (cpu_buf->fd >= 0) { + ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); + close(cpu_buf->fd); + } + free(cpu_buf->buf); + free(cpu_buf); +} + +void perf_buffer__free(struct perf_buffer *pb) +{ + int i; + + if (!pb) + return; + if (pb->cpu_bufs) { + for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) { + struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; + + bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); + perf_buffer__free_cpu_buf(pb, cpu_buf); + } + free(pb->cpu_bufs); + } + if (pb->epoll_fd >= 0) + close(pb->epoll_fd); + free(pb->events); + free(pb); +} + +static struct perf_cpu_buf * +perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, + int cpu, int map_key) +{ + struct perf_cpu_buf *cpu_buf; + char msg[STRERR_BUFSIZE]; + int err; + + cpu_buf = calloc(1, sizeof(*cpu_buf)); + if (!cpu_buf) + return ERR_PTR(-ENOMEM); + + cpu_buf->pb = pb; + cpu_buf->cpu = cpu; + cpu_buf->map_key = map_key; + + cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, + -1, PERF_FLAG_FD_CLOEXEC); + if (cpu_buf->fd < 0) { + err = -errno; + pr_warn("failed to open perf buffer event on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + goto error; + } + + cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + cpu_buf->fd, 0); + if (cpu_buf->base == MAP_FAILED) { + cpu_buf->base = NULL; + err = -errno; + pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + goto error; + } + + if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + goto error; + } + + return cpu_buf; + +error: + perf_buffer__free_cpu_buf(pb, cpu_buf); + return (struct perf_cpu_buf *)ERR_PTR(err); +} + +static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, + struct perf_buffer_params *p); + +struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, + const struct perf_buffer_opts *opts) +{ + struct perf_buffer_params p = {}; + struct perf_event_attr attr = { 0, }; + + attr.config = PERF_COUNT_SW_BPF_OUTPUT, + attr.type = PERF_TYPE_SOFTWARE; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + + p.attr = &attr; + p.sample_cb = opts ? opts->sample_cb : NULL; + p.lost_cb = opts ? opts->lost_cb : NULL; + p.ctx = opts ? opts->ctx : NULL; + + return __perf_buffer__new(map_fd, page_cnt, &p); +} + +struct perf_buffer * +perf_buffer__new_raw(int map_fd, size_t page_cnt, + const struct perf_buffer_raw_opts *opts) +{ + struct perf_buffer_params p = {}; + + p.attr = opts->attr; + p.event_cb = opts->event_cb; + p.ctx = opts->ctx; + p.cpu_cnt = opts->cpu_cnt; + p.cpus = opts->cpus; + p.map_keys = opts->map_keys; + + return __perf_buffer__new(map_fd, page_cnt, &p); +} + +static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, + struct perf_buffer_params *p) +{ + struct bpf_map_info map = {}; + char msg[STRERR_BUFSIZE]; + struct perf_buffer *pb; + __u32 map_info_len; + int err, i; + + if (page_cnt & (page_cnt - 1)) { + pr_warn("page count should be power of two, but is %zu\n", + page_cnt); + return ERR_PTR(-EINVAL); + } + + map_info_len = sizeof(map); + err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len); + if (err) { + err = -errno; + pr_warn("failed to get map info for map FD %d: %s\n", + map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + return ERR_PTR(err); + } + + if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", + map.name); + return ERR_PTR(-EINVAL); + } + + pb = calloc(1, sizeof(*pb)); + if (!pb) + return ERR_PTR(-ENOMEM); + + pb->event_cb = p->event_cb; + pb->sample_cb = p->sample_cb; + pb->lost_cb = p->lost_cb; + pb->ctx = p->ctx; + + pb->page_size = getpagesize(); + pb->mmap_size = pb->page_size * page_cnt; + pb->map_fd = map_fd; + + pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (pb->epoll_fd < 0) { + err = -errno; + pr_warn("failed to create epoll instance: %s\n", + libbpf_strerror_r(err, msg, sizeof(msg))); + goto error; + } + + if (p->cpu_cnt > 0) { + pb->cpu_cnt = p->cpu_cnt; + } else { + pb->cpu_cnt = libbpf_num_possible_cpus(); + if (pb->cpu_cnt < 0) { + err = pb->cpu_cnt; + goto error; + } + if (map.max_entries < pb->cpu_cnt) + pb->cpu_cnt = map.max_entries; + } + + pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); + if (!pb->events) { + err = -ENOMEM; + pr_warn("failed to allocate events: out of memory\n"); + goto error; + } + pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); + if (!pb->cpu_bufs) { + err = -ENOMEM; + pr_warn("failed to allocate buffers: out of memory\n"); + goto error; + } + + for (i = 0; i < pb->cpu_cnt; i++) { + struct perf_cpu_buf *cpu_buf; + int cpu, map_key; + + cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; + map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; + + cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); + if (IS_ERR(cpu_buf)) { + err = PTR_ERR(cpu_buf); + goto error; + } + + pb->cpu_bufs[i] = cpu_buf; + + err = bpf_map_update_elem(pb->map_fd, &map_key, + &cpu_buf->fd, 0); + if (err) { + err = -errno; + pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", + cpu, map_key, cpu_buf->fd, + libbpf_strerror_r(err, msg, sizeof(msg))); + goto error; + } + + pb->events[i].events = EPOLLIN; + pb->events[i].data.ptr = cpu_buf; + if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, + &pb->events[i]) < 0) { + err = -errno; + pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", + cpu, cpu_buf->fd, + libbpf_strerror_r(err, msg, sizeof(msg))); + goto error; + } + } + + return pb; + +error: + if (pb) + perf_buffer__free(pb); + return ERR_PTR(err); +} + +struct perf_sample_raw { + struct perf_event_header header; + uint32_t size; + char data[0]; +}; + +struct perf_sample_lost { + struct perf_event_header header; + uint64_t id; + uint64_t lost; + uint64_t sample_id; +}; + +static enum bpf_perf_event_ret +perf_buffer__process_record(struct perf_event_header *e, void *ctx) +{ + struct perf_cpu_buf *cpu_buf = ctx; + struct perf_buffer *pb = cpu_buf->pb; + void *data = e; + + /* user wants full control over parsing perf event */ + if (pb->event_cb) + return pb->event_cb(pb->ctx, cpu_buf->cpu, e); + + switch (e->type) { + case PERF_RECORD_SAMPLE: { + struct perf_sample_raw *s = data; + + if (pb->sample_cb) + pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); + break; + } + case PERF_RECORD_LOST: { + struct perf_sample_lost *s = data; + + if (pb->lost_cb) + pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); + break; + } + default: + pr_warn("unknown perf sample type %d\n", e->type); + return LIBBPF_PERF_EVENT_ERROR; + } + return LIBBPF_PERF_EVENT_CONT; +} + +static int perf_buffer__process_records(struct perf_buffer *pb, + struct perf_cpu_buf *cpu_buf) +{ + enum bpf_perf_event_ret ret; + + ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, + pb->page_size, &cpu_buf->buf, + &cpu_buf->buf_size, + perf_buffer__process_record, cpu_buf); + if (ret != LIBBPF_PERF_EVENT_CONT) + return ret; + return 0; +} + +int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) +{ + int i, cnt, err; + + cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); + for (i = 0; i < cnt; i++) { + struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; + + err = perf_buffer__process_records(pb, cpu_buf); + if (err) { + pr_warn("error while processing records: %d\n", err); + return err; + } + } + return cnt < 0 ? -errno : cnt; +} + +struct bpf_prog_info_array_desc { + int array_offset; /* e.g. offset of jited_prog_insns */ + int count_offset; /* e.g. offset of jited_prog_len */ + int size_offset; /* > 0: offset of rec size, + * < 0: fix size of -size_offset + */ +}; + +static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { + [BPF_PROG_INFO_JITED_INSNS] = { + offsetof(struct bpf_prog_info, jited_prog_insns), + offsetof(struct bpf_prog_info, jited_prog_len), + -1, + }, + [BPF_PROG_INFO_XLATED_INSNS] = { + offsetof(struct bpf_prog_info, xlated_prog_insns), + offsetof(struct bpf_prog_info, xlated_prog_len), + -1, + }, + [BPF_PROG_INFO_MAP_IDS] = { + offsetof(struct bpf_prog_info, map_ids), + offsetof(struct bpf_prog_info, nr_map_ids), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_JITED_KSYMS] = { + offsetof(struct bpf_prog_info, jited_ksyms), + offsetof(struct bpf_prog_info, nr_jited_ksyms), + -(int)sizeof(__u64), + }, + [BPF_PROG_INFO_JITED_FUNC_LENS] = { + offsetof(struct bpf_prog_info, jited_func_lens), + offsetof(struct bpf_prog_info, nr_jited_func_lens), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_FUNC_INFO] = { + offsetof(struct bpf_prog_info, func_info), + offsetof(struct bpf_prog_info, nr_func_info), + offsetof(struct bpf_prog_info, func_info_rec_size), + }, + [BPF_PROG_INFO_LINE_INFO] = { + offsetof(struct bpf_prog_info, line_info), + offsetof(struct bpf_prog_info, nr_line_info), + offsetof(struct bpf_prog_info, line_info_rec_size), + }, + [BPF_PROG_INFO_JITED_LINE_INFO] = { + offsetof(struct bpf_prog_info, jited_line_info), + offsetof(struct bpf_prog_info, nr_jited_line_info), + offsetof(struct bpf_prog_info, jited_line_info_rec_size), + }, + [BPF_PROG_INFO_PROG_TAGS] = { + offsetof(struct bpf_prog_info, prog_tags), + offsetof(struct bpf_prog_info, nr_prog_tags), + -(int)sizeof(__u8) * BPF_TAG_SIZE, + }, + +}; + +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, + int offset) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u32)]; + return -(int)offset; +} + +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, + int offset) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u64)]; + return -(int)offset; +} + +static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, + __u32 val) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + array[offset / sizeof(__u32)] = val; +} + +static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, + __u64 val) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + array[offset / sizeof(__u64)] = val; +} + +struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 data_len = 0; + int i, err; + void *ptr; + + if (arrays >> BPF_PROG_INFO_LAST_ARRAY) + return ERR_PTR(-EINVAL); + + /* step 1: get array dimensions */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + return ERR_PTR(-EFAULT); + } + + /* step 2: calculate total size of all arrays */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + bool include_array = (arrays & (1UL << i)) > 0; + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + desc = bpf_prog_info_array_desc + i; + + /* kernel is too old to support this field */ + if (info_len < desc->array_offset + sizeof(__u32) || + info_len < desc->count_offset + sizeof(__u32) || + (desc->size_offset > 0 && info_len < desc->size_offset)) + include_array = false; + + if (!include_array) { + arrays &= ~(1UL << i); /* clear the bit */ + continue; + } + + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + + data_len += count * size; + } + + /* step 3: allocate continuous memory */ + data_len = roundup(data_len, sizeof(__u64)); + info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); + if (!info_linear) + return ERR_PTR(-ENOMEM); + + /* step 4: fill data to info_linear->info */ + info_linear->arrays = arrays; + memset(&info_linear->info, 0, sizeof(info)); + ptr = info_linear->data; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->count_offset, count); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->size_offset, size); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, + ptr_to_u64(ptr)); + ptr += count * size; + } + + /* step 5: call syscall again to get required arrays */ + err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + free(info_linear); + return ERR_PTR(-EFAULT); + } + + /* step 6: verify the data */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 v1, v2; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->count_offset); + if (v1 != v2) + pr_warn("%s: mismatch in element count\n", __func__); + + v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->size_offset); + if (v1 != v2) + pr_warn("%s: mismatch in rec size\n", __func__); + } + + /* step 7: update info_len and data_len */ + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + + return info_linear; +} + +void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + addr = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + offs = addr - ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, offs); + } +} + +void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + offs = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + addr = offs + ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, addr); + } +} + +int libbpf_num_possible_cpus(void) +{ + static const char *fcpu = "/sys/devices/system/cpu/possible"; + int len = 0, n = 0, il = 0, ir = 0; + unsigned int start = 0, end = 0; + int tmp_cpus = 0; + static int cpus; + char buf[128]; + int error = 0; + int fd = -1; + + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; + + fd = open(fcpu, O_RDONLY); + if (fd < 0) { + error = errno; + pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); + return -error; + } + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len <= 0) { + error = len ? errno : EINVAL; + pr_warn("Failed to read # of possible cpus from %s: %s\n", + fcpu, strerror(error)); + return -error; + } + if (len == sizeof(buf)) { + pr_warn("File %s size overflow\n", fcpu); + return -EOVERFLOW; + } + buf[len] = '\0'; + + for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { + /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ + if (buf[ir] == ',' || buf[ir] == '\0') { + buf[ir] = '\0'; + n = sscanf(&buf[il], "%u-%u", &start, &end); + if (n <= 0) { + pr_warn("Failed to get # CPUs from %s\n", + &buf[il]); + return -EINVAL; + } else if (n == 1) { + end = start; + } + tmp_cpus += end - start + 1; + il = ir + 1; + } + } + if (tmp_cpus <= 0) { + pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); + return -EINVAL; + } + + WRITE_ONCE(cpus, tmp_cpus); + return tmp_cpus; +} diff --git a/src/contrib/libbpf/bpf/libbpf.h b/src/contrib/libbpf/bpf/libbpf.h new file mode 100644 index 0000000..0dbf4bf --- /dev/null +++ b/src/contrib/libbpf/bpf/libbpf.h @@ -0,0 +1,637 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Common eBPF ELF object loading operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + */ +#ifndef __LIBBPF_LIBBPF_H +#define __LIBBPF_LIBBPF_H + +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <sys/types.h> // for size_t +#include <linux/bpf.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + +enum libbpf_errno { + __LIBBPF_ERRNO__START = 4000, + + /* Something wrong in libelf */ + LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START, + LIBBPF_ERRNO__FORMAT, /* BPF object format invalid */ + LIBBPF_ERRNO__KVERSION, /* Incorrect or no 'version' section */ + LIBBPF_ERRNO__ENDIAN, /* Endian mismatch */ + LIBBPF_ERRNO__INTERNAL, /* Internal error in libbpf */ + LIBBPF_ERRNO__RELOC, /* Relocation failed */ + LIBBPF_ERRNO__LOAD, /* Load program failure for unknown reason */ + LIBBPF_ERRNO__VERIFY, /* Kernel verifier blocks program loading */ + LIBBPF_ERRNO__PROG2BIG, /* Program too big */ + LIBBPF_ERRNO__KVER, /* Incorrect kernel version */ + LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ + LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ + LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ + LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */ + __LIBBPF_ERRNO__END, +}; + +LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); + +enum libbpf_print_level { + LIBBPF_WARN, + LIBBPF_INFO, + LIBBPF_DEBUG, +}; + +typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, + const char *, va_list ap); + +LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn); + +/* Hide internal to user */ +struct bpf_object; + +struct bpf_object_open_attr { + const char *file; + enum bpf_prog_type prog_type; +}; + +/* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, + * followed by assignment using compound literal syntax is done to preserve + * ability to use a nice struct field initialization syntax and **hopefully** + * have all the padding bytes initialized to zero. It's not guaranteed though, + * when copying literal, that compiler won't copy garbage in literal's padding + * bytes, but that's the best way I've found and it seems to work in practice. + * + * Macro declares opts struct of given type and name, zero-initializes, + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. + */ +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME = ({ \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + (struct TYPE) { \ + .sz = sizeof(struct TYPE), \ + __VA_ARGS__ \ + }; \ + }) + +struct bpf_object_open_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* object name override, if provided: + * - for object open from file, this will override setting object + * name from file path's base name; + * - for object open from memory buffer, this will specify an object + * name and will override default "<addr>-<buf-size>" name; + */ + const char *object_name; + /* parse map definitions non-strictly, allowing extra attributes/data */ + bool relaxed_maps; + /* process CO-RE relocations non-strictly, allowing them to fail */ + bool relaxed_core_relocs; + /* maps that set the 'pinning' attribute in their definition will have + * their pin_path attribute set to a file in this directory, and be + * auto-pinned to that path on load; defaults to "/sys/fs/bpf". + */ + const char *pin_root_path; + __u32 attach_prog_fd; +}; +#define bpf_object_open_opts__last_field attach_prog_fd + +LIBBPF_API struct bpf_object *bpf_object__open(const char *path); +LIBBPF_API struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); +LIBBPF_API struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts); + +/* deprecated bpf_object__open variants */ +LIBBPF_API struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, + const char *name); +LIBBPF_API struct bpf_object * +bpf_object__open_xattr(struct bpf_object_open_attr *attr); + +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); + +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + +/* pin_maps and unpin_maps can both be called with a NULL path, in which case + * they will use the pin_path attribute of each map (and ignore all maps that + * don't have a pin_path set). + */ +LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); +LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); +LIBBPF_API void bpf_object__close(struct bpf_object *object); + +struct bpf_object_load_attr { + struct bpf_object *obj; + int log_level; + const char *target_btf_path; +}; + +/* Load/unload object into/from kernel */ +LIBBPF_API int bpf_object__load(struct bpf_object *obj); +LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); +LIBBPF_API int bpf_object__unload(struct bpf_object *obj); +LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); +LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); + +struct btf; +LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); +LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); + +LIBBPF_API struct bpf_program * +bpf_object__find_program_by_title(const struct bpf_object *obj, + const char *title); + +LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); +#define bpf_object__for_each_safe(pos, tmp) \ + for ((pos) = bpf_object__next(NULL), \ + (tmp) = bpf_object__next(pos); \ + (pos) != NULL; \ + (pos) = (tmp), (tmp) = bpf_object__next(tmp)) + +typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); +LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, + bpf_object_clear_priv_t clear_priv); +LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog); + +LIBBPF_API int +libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type); +LIBBPF_API int libbpf_attach_type_by_name(const char *name, + enum bpf_attach_type *attach_type); +LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, + enum bpf_attach_type attach_type); + +/* Accessors of bpf_program */ +struct bpf_program; +LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, + const struct bpf_object *obj); + +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_program__next(NULL, (obj)); \ + (pos) != NULL; \ + (pos) = bpf_program__next((pos), (obj))) + +LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, + const struct bpf_object *obj); + +typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); + +LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv); + +LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); +LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, + __u32 ifindex); + +LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, + bool needs_copy); + +/* returns program size in bytes */ +LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); + +LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, + __u32 kern_version); +LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); +LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, + const char *path, + int instance); +LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, + const char *path, + int instance); +LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); +LIBBPF_API void bpf_program__unload(struct bpf_program *prog); + +struct bpf_link; + +LIBBPF_API int bpf_link__destroy(struct bpf_link *link); + +LIBBPF_API struct bpf_link * +bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); +LIBBPF_API struct bpf_link * +bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, + const char *func_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, + pid_t pid, const char *binary_path, + size_t func_offset); +LIBBPF_API struct bpf_link * +bpf_program__attach_tracepoint(struct bpf_program *prog, + const char *tp_category, + const char *tp_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_raw_tracepoint(struct bpf_program *prog, + const char *tp_name); + +LIBBPF_API struct bpf_link * +bpf_program__attach_trace(struct bpf_program *prog); +struct bpf_insn; + +/* + * Libbpf allows callers to adjust BPF programs before being loaded + * into kernel. One program in an object file can be transformed into + * multiple variants to be attached to different hooks. + * + * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd + * form an API for this purpose. + * + * - bpf_program_prep_t: + * Defines a 'preprocessor', which is a caller defined function + * passed to libbpf through bpf_program__set_prep(), and will be + * called before program is loaded. The processor should adjust + * the program one time for each instance according to the instance id + * passed to it. + * + * - bpf_program__set_prep: + * Attaches a preprocessor to a BPF program. The number of instances + * that should be created is also passed through this function. + * + * - bpf_program__nth_fd: + * After the program is loaded, get resulting FD of a given instance + * of the BPF program. + * + * If bpf_program__set_prep() is not used, the program would be loaded + * without adjustment during bpf_object__load(). The program has only + * one instance. In this case bpf_program__fd(prog) is equal to + * bpf_program__nth_fd(prog, 0). + */ + +struct bpf_prog_prep_result { + /* + * If not NULL, load new instruction array. + * If set to NULL, don't load this instance. + */ + struct bpf_insn *new_insn_ptr; + int new_insn_cnt; + + /* If not NULL, result FD is written to it. */ + int *pfd; +}; + +/* + * Parameters of bpf_program_prep_t: + * - prog: The bpf_program being loaded. + * - n: Index of instance being generated. + * - insns: BPF instructions array. + * - insns_cnt:Number of instructions in insns. + * - res: Output parameter, result of transformation. + * + * Return value: + * - Zero: pre-processing success. + * - Non-zero: pre-processing error, stop loading. + */ +typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, + struct bpf_insn *insns, int insns_cnt, + struct bpf_prog_prep_result *res); + +LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, + bpf_program_prep_t prep); + +LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); + +/* + * Adjust type of BPF program. Default is kprobe. + */ +LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); + +LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); +LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); + +LIBBPF_API enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog); +LIBBPF_API void +bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type); + +LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); + +/* + * No need for __attribute__((packed)), all members of 'bpf_map_def' + * are all aligned. In addition, using __attribute__((packed)) + * would trigger a -Wpacked warning message, and lead to an error + * if -Werror is set. + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + +/* + * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, + * so no need to worry about a name clash. + */ +struct bpf_map; +LIBBPF_API struct bpf_map * +bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); + +LIBBPF_API int +bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); + +/* + * Get bpf_map through the offset of corresponding struct bpf_map_def + * in the BPF object file. + */ +LIBBPF_API struct bpf_map * +bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); + +LIBBPF_API struct bpf_map * +bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); +#define bpf_object__for_each_map(pos, obj) \ + for ((pos) = bpf_map__next(NULL, (obj)); \ + (pos) != NULL; \ + (pos) = bpf_map__next((pos), (obj))) +#define bpf_map__for_each bpf_object__for_each_map + +LIBBPF_API struct bpf_map * +bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); + +LIBBPF_API int bpf_map__fd(const struct bpf_map *map); +LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); +LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); +LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); + +typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); +LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv); +LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); +LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); +LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); +LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); +LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); +LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); +LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); + +LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); + +LIBBPF_API long libbpf_get_error(const void *ptr); + +struct bpf_prog_load_attr { + const char *file; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + int ifindex; + int log_level; + int prog_flags; +}; + +LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd); +LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); + +struct xdp_link_info { + __u32 prog_id; + __u32 drv_prog_id; + __u32 hw_prog_id; + __u32 skb_prog_id; + __u8 attach_mode; +}; + +LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags); + +struct perf_buffer; + +typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu, + void *data, __u32 size); +typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); + +/* common use perf buffer options */ +struct perf_buffer_opts { + /* if specified, sample_cb is called for each sample */ + perf_buffer_sample_fn sample_cb; + /* if specified, lost_cb is called for each batch of lost samples */ + perf_buffer_lost_fn lost_cb; + /* ctx is provided to sample_cb and lost_cb */ + void *ctx; +}; + +LIBBPF_API struct perf_buffer * +perf_buffer__new(int map_fd, size_t page_cnt, + const struct perf_buffer_opts *opts); + +enum bpf_perf_event_ret { + LIBBPF_PERF_EVENT_DONE = 0, + LIBBPF_PERF_EVENT_ERROR = -1, + LIBBPF_PERF_EVENT_CONT = -2, +}; + +struct perf_event_header; + +typedef enum bpf_perf_event_ret +(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event); + +/* raw perf buffer options, giving most power and control */ +struct perf_buffer_raw_opts { + /* perf event attrs passed directly into perf_event_open() */ + struct perf_event_attr *attr; + /* raw event callback */ + perf_buffer_event_fn event_cb; + /* ctx is provided to event_cb */ + void *ctx; + /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of + * max_entries of given PERF_EVENT_ARRAY map) + */ + int cpu_cnt; + /* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */ + int *cpus; + /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */ + int *map_keys; +}; + +LIBBPF_API struct perf_buffer * +perf_buffer__new_raw(int map_fd, size_t page_cnt, + const struct perf_buffer_raw_opts *opts); + +LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); +LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); + +typedef enum bpf_perf_event_ret + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); +LIBBPF_API enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data); + +struct nlattr; +typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +int libbpf_netlink_open(unsigned int *nl_pid); +int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); +int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); +int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); +int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + +struct bpf_prog_linfo; +struct bpf_prog_info; + +LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); +LIBBPF_API struct bpf_prog_linfo * +bpf_prog_linfo__new(const struct bpf_prog_info *info); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip); + +/* + * Probe for supported system features + * + * Note that running many of these probes in a short amount of time can cause + * the kernel to reach the maximal size of lockable memory allowed for the + * user, causing subsequent probes to fail. In this case, the caller may want + * to adjust that limit with setrlimit(). + */ +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, + __u32 ifindex); +LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, + enum bpf_prog_type prog_type, __u32 ifindex); + +/* + * Get bpf_prog_info in continuous memory + * + * struct bpf_prog_info has multiple arrays. The user has option to choose + * arrays to fetch from kernel. The following APIs provide an uniform way to + * fetch these data. All arrays in bpf_prog_info are stored in a single + * continuous memory region. This makes it easy to store the info in a + * file. + * + * Before writing bpf_prog_info_linear to files, it is necessary to + * translate pointers in bpf_prog_info to offsets. Helper functions + * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr() + * are introduced to switch between pointers and offsets. + * + * Examples: + * # To fetch map_ids and prog_tags: + * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) | + * (1UL << BPF_PROG_INFO_PROG_TAGS); + * struct bpf_prog_info_linear *info_linear = + * bpf_program__get_prog_info_linear(fd, arrays); + * + * # To save data in file + * bpf_program__bpil_addr_to_offs(info_linear); + * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); + * + * # To read data from file + * read(f, info_linear, <proper_size>); + * bpf_program__bpil_offs_to_addr(info_linear); + */ +enum bpf_prog_info_array { + BPF_PROG_INFO_FIRST_ARRAY = 0, + BPF_PROG_INFO_JITED_INSNS = 0, + BPF_PROG_INFO_XLATED_INSNS, + BPF_PROG_INFO_MAP_IDS, + BPF_PROG_INFO_JITED_KSYMS, + BPF_PROG_INFO_JITED_FUNC_LENS, + BPF_PROG_INFO_FUNC_INFO, + BPF_PROG_INFO_LINE_INFO, + BPF_PROG_INFO_JITED_LINE_INFO, + BPF_PROG_INFO_PROG_TAGS, + BPF_PROG_INFO_LAST_ARRAY, +}; + +struct bpf_prog_info_linear { + /* size of struct bpf_prog_info, when the tool is compiled */ + __u32 info_len; + /* total bytes allocated for data, round up to 8 bytes */ + __u32 data_len; + /* which arrays are included in data */ + __u64 arrays; + struct bpf_prog_info info; + __u8 data[]; +}; + +LIBBPF_API struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays); + +LIBBPF_API void +bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); + +LIBBPF_API void +bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); + +/* + * A helper function to get the number of possible CPUs before looking up + * per-CPU maps. Negative errno is returned on failure. + * + * Example usage: + * + * int ncpus = libbpf_num_possible_cpus(); + * if (ncpus < 0) { + * // error handling + * } + * long values[ncpus]; + * bpf_map_lookup_elem(per_cpu_map_fd, key, values); + * + */ +LIBBPF_API int libbpf_num_possible_cpus(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_LIBBPF_H */ diff --git a/src/contrib/libbpf/bpf/libbpf_errno.c b/src/contrib/libbpf/bpf/libbpf_errno.c new file mode 100644 index 0000000..4343e40 --- /dev/null +++ b/src/contrib/libbpf/bpf/libbpf_errno.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * Copyright (C) 2017 Nicira, Inc. + */ + +#undef _GNU_SOURCE +#include <stdio.h> +#include <string.h> + +#include "libbpf.h" + +#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) +#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) +#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) + +static const char *libbpf_strerror_table[NR_ERRNO] = { + [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", + [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", + [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", + [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", + [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", + [ERRCODE_OFFSET(RELOC)] = "Relocation failed", + [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", + [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", + [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", + [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", + [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", + [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", + [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", +}; + +int libbpf_strerror(int err, char *buf, size_t size) +{ + if (!buf || !size) + return -1; + + err = err > 0 ? err : -err; + + if (err < __LIBBPF_ERRNO__START) { + int ret; + + ret = strerror_r(err, buf, size); + buf[size - 1] = '\0'; + return ret; + } + + if (err < __LIBBPF_ERRNO__END) { + const char *msg; + + msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; + snprintf(buf, size, "%s", msg); + buf[size - 1] = '\0'; + return 0; + } + + snprintf(buf, size, "Unknown libbpf error %d", err); + buf[size - 1] = '\0'; + return -1; +} diff --git a/src/contrib/libbpf/bpf/libbpf_internal.h b/src/contrib/libbpf/bpf/libbpf_internal.h new file mode 100644 index 0000000..97ac17a --- /dev/null +++ b/src/contrib/libbpf/bpf/libbpf_internal.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Internal libbpf helpers. + * + * Copyright (c) 2019 Facebook + */ + +#ifndef __LIBBPF_LIBBPF_INTERNAL_H +#define __LIBBPF_LIBBPF_INTERNAL_H + +#include "libbpf.h" + +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset) +#define BTF_PARAM_ENC(name, type) (name), (type) +#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) + +#ifndef min +# define min(x, y) ((x) < (y) ? (x) : (y)) +#endif +#ifndef max +# define max(x, y) ((x) < (y) ? (y) : (x)) +#endif +#ifndef offsetofend +# define offsetofend(TYPE, FIELD) \ + (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) +#endif + +/* Symbol versioning is different between static and shared library. + * Properly versioned symbols are needed for shared library, but + * only the symbol of the new version is needed for static library. + */ +#ifdef SHARED +# define COMPAT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@" #version); +# define DEFAULT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@@" #version); +#else +# define COMPAT_VERSION(internal_name, api_name, version) +# define DEFAULT_VERSION(internal_name, api_name, version) \ + extern typeof(internal_name) api_name \ + __attribute__((alias(#internal_name))); +#endif + +extern void libbpf_print(enum libbpf_print_level level, + const char *format, ...) + __attribute__((format(printf, 2, 3))); + +#define __pr(level, fmt, ...) \ +do { \ + libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warn(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) + +static inline bool libbpf_validate_opts(const char *opts, + size_t opts_sz, size_t user_sz, + const char *type_name) +{ + if (user_sz < sizeof(size_t)) { + pr_warn("%s size (%zu) is too small\n", type_name, user_sz); + return false; + } + if (user_sz > opts_sz) { + size_t i; + + for (i = opts_sz; i < user_sz; i++) { + if (opts[i]) { + pr_warn("%s has non-zero extra bytes", + type_name); + return false; + } + } + } + return true; +} + +#define OPTS_VALID(opts, type) \ + (!(opts) || libbpf_validate_opts((const char *)opts, \ + offsetofend(struct type, \ + type##__last_field), \ + (opts)->sz, #type)) +#define OPTS_HAS(opts, field) \ + ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) +#define OPTS_GET(opts, field, fallback_value) \ + (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) + +int libbpf__load_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len); + +struct btf_ext_info { + /* + * info points to the individual info section (e.g. func_info and + * line_info) from the .BTF.ext. It does not include the __u32 rec_size. + */ + void *info; + __u32 rec_size; + __u32 len; +}; + +#define for_each_btf_ext_sec(seg, sec) \ + for (sec = (seg)->info; \ + (void *)sec < (seg)->info + (seg)->len; \ + sec = (void *)sec + sizeof(struct btf_ext_info_sec) + \ + (seg)->rec_size * sec->num_info) + +#define for_each_btf_ext_rec(seg, sec, i, rec) \ + for (i = 0, rec = (void *)&(sec)->data; \ + i < (sec)->num_info; \ + i++, rec = (void *)rec + (seg)->rec_size) + +struct btf_ext { + union { + struct btf_ext_header *hdr; + void *data; + }; + struct btf_ext_info func_info; + struct btf_ext_info line_info; + struct btf_ext_info field_reloc_info; + __u32 data_size; +}; + +struct btf_ext_info_sec { + __u32 sec_name_off; + __u32 num_info; + /* Followed by num_info * record_size number of bytes */ + __u8 data[0]; +}; + +/* The minimum bpf_func_info checked by the loader */ +struct bpf_func_info_min { + __u32 insn_off; + __u32 type_id; +}; + +/* The minimum bpf_line_info checked by the loader */ +struct bpf_line_info_min { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + +/* bpf_field_info_kind encodes which aspect of captured field has to be + * adjusted by relocations. Currently supported values are: + * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes); + * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise); + */ +enum bpf_field_info_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_BYTE_SIZE = 1, + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_FIELD_SIGNED = 3, + BPF_FIELD_LSHIFT_U64 = 4, + BPF_FIELD_RSHIFT_U64 = 5, +}; + +/* The minimum bpf_field_reloc checked by the loader + * + * Field relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * field; + * - access_str_off - offset into corresponding .BTF string section. String + * itself encodes an accessed field using a sequence of field and array + * indicies, separated by colon (:). It's conceptually very close to LLVM's + * getelementptr ([0]) instruction's arguments for identifying offset to + * a field. + * + * Example to provide a better feel. + * + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int x = &s->a; // encoded as "0:0" (a is field #0) + * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_field_reloc { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_field_info_kind kind; +}; + +#endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/src/contrib/libbpf/bpf/libbpf_probes.c b/src/contrib/libbpf/bpf/libbpf_probes.c new file mode 100644 index 0000000..a9eb8b3 --- /dev/null +++ b/src/contrib/libbpf/bpf/libbpf_probes.c @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2019 Netronome Systems, Inc. */ + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <net/if.h> +#include <sys/utsname.h> + +#include <linux/btf.h> +#include <linux/filter.h> +#include <linux/kernel.h> + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +static bool grep(const char *buffer, const char *pattern) +{ + return !!strstr(buffer, pattern); +} + +static int get_vendor_id(int ifindex) +{ + char ifname[IF_NAMESIZE], path[64], buf[8]; + ssize_t len; + int fd; + + if (!if_indextoname(ifindex, ifname)) + return -1; + + snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) + return -1; + if (len >= (ssize_t)sizeof(buf)) + return -1; + buf[len] = '\0'; + + return strtol(buf, NULL, 0); +} + +static int get_kernel_version(void) +{ + int version, subversion, patchlevel; + struct utsname utsn; + + /* Return 0 on failure, and attempt to probe with empty kversion */ + if (uname(&utsn)) + return 0; + + if (sscanf(utsn.release, "%d.%d.%d", + &version, &subversion, &patchlevel) != 3) + return 0; + + return (version << 16) + (subversion << 8) + patchlevel; +} + +static void +probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, + size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) +{ + struct bpf_load_program_attr xattr = {}; + int fd; + + switch (prog_type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + break; + case BPF_PROG_TYPE_KPROBE: + xattr.kern_version = get_kernel_version(); + break; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_XDP: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_SK_SKB: + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + case BPF_PROG_TYPE_LIRC_MODE2: + case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SYSCTL: + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_TRACING: + default: + break; + } + + xattr.prog_type = prog_type; + xattr.insns = insns; + xattr.insns_cnt = insns_cnt; + xattr.license = "GPL"; + xattr.prog_ifindex = ifindex; + + fd = bpf_load_program_xattr(&xattr, buf, buf_len); + if (fd >= 0) + close(fd); +} + +bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN() + }; + + if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) + /* nfp returns -EINVAL on exit(0) with TC offload */ + insns[0].imm = 2; + + errno = 0; + probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + + return errno != EINVAL && errno != EOPNOTSUPP; +} + +int libbpf__load_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len) +{ + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = types_len, + .str_off = types_len, + .str_len = str_len, + }; + int btf_fd, btf_len; + __u8 *raw_btf; + + btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len; + raw_btf = malloc(btf_len); + if (!raw_btf) + return -ENOMEM; + + memcpy(raw_btf, &hdr, sizeof(hdr)); + memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); + memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); + + btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); + + free(raw_btf); + return btf_fd; +} + +static int load_sk_storage_btf(void) +{ + const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l"; + /* struct bpf_spin_lock { + * int val; + * }; + * struct val { + * int cnt; + * struct bpf_spin_lock l; + * }; + */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct bpf_spin_lock */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), + BTF_MEMBER_ENC(15, 1, 0), /* int val; */ + /* struct val */ /* [3] */ + BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ + BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ + }; + + return libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); +} + +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +{ + int key_size, value_size, max_entries, map_flags; + __u32 btf_key_type_id = 0, btf_value_type_id = 0; + struct bpf_create_map_attr attr = {}; + int fd = -1, btf_fd = -1, fd_inner; + + key_size = sizeof(__u32); + value_size = sizeof(__u32); + max_entries = 1; + map_flags = 0; + + switch (map_type) { + case BPF_MAP_TYPE_STACK_TRACE: + value_size = sizeof(__u64); + break; + case BPF_MAP_TYPE_LPM_TRIE: + key_size = sizeof(__u64); + value_size = sizeof(__u64); + map_flags = BPF_F_NO_PREALLOC; + break; + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + key_size = sizeof(struct bpf_cgroup_storage_key); + value_size = sizeof(__u64); + max_entries = 0; + break; + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + key_size = 0; + break; + case BPF_MAP_TYPE_SK_STORAGE: + btf_key_type_id = 1; + btf_value_type_id = 3; + value_size = 8; + max_entries = 0; + map_flags = BPF_F_NO_PREALLOC; + btf_fd = load_sk_storage_btf(); + if (btf_fd < 0) + return false; + break; + case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + default: + break; + } + + if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + /* TODO: probe for device, once libbpf has a function to create + * map-in-map for offload + */ + if (ifindex) + return false; + + fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(__u32), sizeof(__u32), 1, 0); + if (fd_inner < 0) + return false; + fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), + fd_inner, 1, 0); + close(fd_inner); + } else { + /* Note: No other restriction on map type probes for offload */ + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + attr.map_ifindex = ifindex; + if (btf_fd >= 0) { + attr.btf_fd = btf_fd; + attr.btf_key_type_id = btf_key_type_id; + attr.btf_value_type_id = btf_value_type_id; + } + + fd = bpf_create_map_xattr(&attr); + } + if (fd >= 0) + close(fd); + if (btf_fd >= 0) + close(btf_fd); + + return fd >= 0; +} + +bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, + __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_EMIT_CALL(id), + BPF_EXIT_INSN() + }; + char buf[4096] = {}; + bool res; + + probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), + ifindex); + res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); + + if (ifindex) { + switch (get_vendor_id(ifindex)) { + case 0x19ee: /* Netronome specific */ + res = res && !grep(buf, "not supported by FW") && + !grep(buf, "unsupported function id"); + break; + default: + break; + } + } + + return res; +} diff --git a/src/contrib/libbpf/bpf/libbpf_util.h b/src/contrib/libbpf/bpf/libbpf_util.h new file mode 100644 index 0000000..59c779c --- /dev/null +++ b/src/contrib/libbpf/bpf/libbpf_util.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __LIBBPF_LIBBPF_UTIL_H +#define __LIBBPF_LIBBPF_UTIL_H + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use these barrier functions instead of smp_[rw]mb() when they are + * used in a libbpf header file. That way they can be built into the + * application that uses libbpf. + */ +#if defined(__i386__) || defined(__x86_64__) +# define libbpf_smp_rmb() asm volatile("" : : : "memory") +# define libbpf_smp_wmb() asm volatile("" : : : "memory") +# define libbpf_smp_mb() \ + asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc") +/* Hinders stores to be observed before older loads. */ +# define libbpf_smp_rwmb() asm volatile("" : : : "memory") +#elif defined(__aarch64__) +# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() +#elif defined(__arm__) +/* These are only valid for armv7 and above */ +# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() +#else +/* Architecture missing native barrier functions. */ +# define libbpf_smp_rmb() __sync_synchronize() +# define libbpf_smp_wmb() __sync_synchronize() +# define libbpf_smp_mb() __sync_synchronize() +# define libbpf_smp_rwmb() __sync_synchronize() +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/contrib/libbpf/bpf/netlink.c b/src/contrib/libbpf/bpf/netlink.c new file mode 100644 index 0000000..5065c1a --- /dev/null +++ b/src/contrib/libbpf/bpf/netlink.c @@ -0,0 +1,451 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include <stdlib.h> +#include <memory.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <linux/rtnetlink.h> +#include <sys/socket.h> +#include <errno.h> +#include <time.h> + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" +#include "nlattr.h" + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, + void *cookie); + +struct xdp_id_md { + int ifindex; + __u32 flags; + struct xdp_link_info info; +}; + +int libbpf_netlink_open(__u32 *nl_pid) +{ + struct sockaddr_nl sa; + socklen_t addrlen; + int one = 1, ret; + int sock; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) + return -errno; + + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + pr_warn("Netlink error reporting not supported\n"); + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + *nl_pid = sa.nl_pid; + return sock; + +cleanup: + close(sock); + return ret; +} + +static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, + __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + bool multipart = true; + struct nlmsgerr *err; + struct nlmsghdr *nh; + char buf[4096]; + int len, ret; + + while (multipart) { + multipart = false; + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto done; + } + + if (len == 0) + break; + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto done; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto done; + } + if (nh->nlmsg_flags & NLM_F_MULTI) + multipart = true; + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + libbpf_nla_dump_errormsg(nh); + goto done; + case NLMSG_DONE: + return 0; + default: + break; + } + if (_fn) { + ret = _fn(nh, fn, cookie); + if (ret) + return ret; + } + } + } + ret = 0; +done: + return ret; +} + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + int sock, seq = 0, ret; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + __u32 nl_pid; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | IFLA_XDP; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FLAGS; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + ret = -errno; + goto cleanup; + } + ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); + +cleanup: + close(sock); + return ret; +} + +static int __dump_link_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + struct nlattr *tb[IFLA_MAX + 1], *attr; + struct ifinfomsg *ifi = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); + attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); + if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_link_nlmsg(cookie, ifi, tb); +} + +static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) +{ + struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; + struct xdp_id_md *xdp_id = cookie; + struct ifinfomsg *ifinfo = msg; + int ret; + + if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) + return 0; + + if (!tb[IFLA_XDP]) + return 0; + + ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL); + if (ret) + return ret; + + if (!xdp_tb[IFLA_XDP_ATTACHED]) + return 0; + + xdp_id->info.attach_mode = libbpf_nla_getattr_u8( + xdp_tb[IFLA_XDP_ATTACHED]); + + if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE) + return 0; + + if (xdp_tb[IFLA_XDP_PROG_ID]) + xdp_id->info.prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_PROG_ID]); + + if (xdp_tb[IFLA_XDP_SKB_PROG_ID]) + xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_SKB_PROG_ID]); + + if (xdp_tb[IFLA_XDP_DRV_PROG_ID]) + xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_DRV_PROG_ID]); + + if (xdp_tb[IFLA_XDP_HW_PROG_ID]) + xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_HW_PROG_ID]); + + return 0; +} + +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags) +{ + struct xdp_id_md xdp_id = {}; + int sock, ret; + __u32 nl_pid; + __u32 mask; + + if (flags & ~XDP_FLAGS_MASK || !info_size) + return -EINVAL; + + /* Check whether the single {HW,DRV,SKB} mode is set */ + flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); + mask = flags - 1; + if (flags && flags & mask) + return -EINVAL; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + xdp_id.ifindex = ifindex; + xdp_id.flags = flags; + + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); + if (!ret) { + size_t sz = min(info_size, sizeof(xdp_id.info)); + + memcpy(info, &xdp_id.info, sz); + memset((void *) info + sz, 0, info_size - sz); + } + + close(sock); + return ret; +} + +static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +{ + if (info->attach_mode != XDP_ATTACHED_MULTI) + return info->prog_id; + if (flags & XDP_FLAGS_DRV_MODE) + return info->drv_prog_id; + if (flags & XDP_FLAGS_HW_MODE) + return info->hw_prog_id; + if (flags & XDP_FLAGS_SKB_MODE) + return info->skb_prog_id; + + return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + struct xdp_link_info info; + int ret; + + ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); + if (!ret) + *prog_id = get_xdp_id(&info, flags); + + return ret; +} + +int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .ifm.ifi_family = AF_PACKET, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, + dump_link_nlmsg, cookie); +} + +static int __dump_class_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_class_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_class_nlmsg(cookie, t, tb); +} + +int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTCLASS, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, + dump_class_nlmsg, cookie); +} + +static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_qdisc_nlmsg(cookie, t, tb); +} + +int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETQDISC, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, + dump_qdisc_nlmsg, cookie); +} + +static int __dump_filter_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_filter_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_filter_nlmsg(cookie, t, tb); +} + +int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTFILTER, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + .t.tcm_parent = handle, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, + dump_filter_nlmsg, cookie); +} diff --git a/src/contrib/libbpf/bpf/nlattr.c b/src/contrib/libbpf/bpf/nlattr.c new file mode 100644 index 0000000..8db44bb --- /dev/null +++ b/src/contrib/libbpf/bpf/nlattr.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * NETLINK Netlink attributes + * + * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> + */ + +#include <errno.h> +#include "nlattr.h" +#include "libbpf_internal.h" +#include <linux/rtnetlink.h> +#include <string.h> +#include <stdio.h> + +static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { + [LIBBPF_NLA_U8] = sizeof(uint8_t), + [LIBBPF_NLA_U16] = sizeof(uint16_t), + [LIBBPF_NLA_U32] = sizeof(uint32_t), + [LIBBPF_NLA_U64] = sizeof(uint64_t), + [LIBBPF_NLA_STRING] = 1, + [LIBBPF_NLA_FLAG] = 0, +}; + +static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) +{ + int totlen = NLA_ALIGN(nla->nla_len); + + *remaining -= totlen; + return (struct nlattr *) ((char *) nla + totlen); +} + +static int nla_ok(const struct nlattr *nla, int remaining) +{ + return remaining >= sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && + nla->nla_len <= remaining; +} + +static int nla_type(const struct nlattr *nla) +{ + return nla->nla_type & NLA_TYPE_MASK; +} + +static int validate_nla(struct nlattr *nla, int maxtype, + struct libbpf_nla_policy *policy) +{ + struct libbpf_nla_policy *pt; + unsigned int minlen = 0; + int type = nla_type(nla); + + if (type < 0 || type > maxtype) + return 0; + + pt = &policy[type]; + + if (pt->type > LIBBPF_NLA_TYPE_MAX) + return 0; + + if (pt->minlen) + minlen = pt->minlen; + else if (pt->type != LIBBPF_NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (libbpf_nla_len(nla) < minlen) + return -1; + + if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) + return -1; + + if (pt->type == LIBBPF_NLA_STRING) { + char *data = libbpf_nla_data(nla); + + if (data[libbpf_nla_len(nla) - 1] != '\0') + return -1; + } + + return 0; +} + +static inline int nlmsg_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +/** + * Create attribute index based on a stream of attributes. + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg head Head of attribute stream. + * @arg len Length of attribute stream. + * @arg policy Attribute validation policy. + * + * Iterates over the stream of attributes and stores a pointer to each + * attribute in the index array using the attribute type as index to + * the array. Attribute with a type greater than the maximum type + * specified will be silently ignored in order to maintain backwards + * compatibility. If \a policy is not NULL, the attribute will be + * validated using the specified policy. + * + * @see nla_validate + * @return 0 on success or a negative error code. + */ +int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, + int len, struct libbpf_nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + libbpf_nla_for_each_attr(nla, head, len, rem) { + int type = nla_type(nla); + + if (type > maxtype) + continue; + + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + if (tb[type]) + pr_warn("Attribute of type %#x found multiple times in message, " + "previous attribute is being ignored.\n", type); + + tb[type] = nla; + } + + err = 0; +errout: + return err; +} + +/** + * Create attribute index based on nested attribute + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg nla Nested Attribute. + * @arg policy Attribute validation policy. + * + * Feeds the stream of attributes nested into the specified attribute + * to libbpf_nla_parse(). + * + * @see libbpf_nla_parse + * @return 0 on success or a negative error code. + */ +int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + struct libbpf_nla_policy *policy) +{ + return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla), + libbpf_nla_len(nla), policy); +} + +/* dump netlink extended ack error message */ +int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh) +{ + struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { + [NLMSGERR_ATTR_MSG] = { .type = LIBBPF_NLA_STRING }, + [NLMSGERR_ATTR_OFFS] = { .type = LIBBPF_NLA_U32 }, + }; + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr; + struct nlmsgerr *err; + char *errmsg = NULL; + int hlen, alen; + + /* no TLVs, nothing to do here */ + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 0; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + hlen = sizeof(*err); + + /* if NLM_F_CAPPED is set then the inner err msg was capped */ + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + hlen += nlmsg_len(&err->msg); + + attr = (struct nlattr *) ((void *) err + hlen); + alen = nlh->nlmsg_len - hlen; + + if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, + extack_policy) != 0) { + pr_warn("Failed to parse extended error attributes\n"); + return 0; + } + + if (tb[NLMSGERR_ATTR_MSG]) + errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); + + pr_warn("Kernel error message: %s\n", errmsg); + + return 0; +} diff --git a/src/contrib/libbpf/bpf/nlattr.h b/src/contrib/libbpf/bpf/nlattr.h new file mode 100644 index 0000000..6cc3ac9 --- /dev/null +++ b/src/contrib/libbpf/bpf/nlattr.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * NETLINK Netlink attributes + * + * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> + */ + +#ifndef __LIBBPF_NLATTR_H +#define __LIBBPF_NLATTR_H + +#include <stdint.h> +#include <linux/netlink.h> +/* avoid multiple definition of netlink features */ +#define __LINUX_NETLINK_H + +/** + * Standard attribute types to specify validation policy + */ +enum { + LIBBPF_NLA_UNSPEC, /**< Unspecified type, binary data chunk */ + LIBBPF_NLA_U8, /**< 8 bit integer */ + LIBBPF_NLA_U16, /**< 16 bit integer */ + LIBBPF_NLA_U32, /**< 32 bit integer */ + LIBBPF_NLA_U64, /**< 64 bit integer */ + LIBBPF_NLA_STRING, /**< NUL terminated character string */ + LIBBPF_NLA_FLAG, /**< Flag */ + LIBBPF_NLA_MSECS, /**< Micro seconds (64bit) */ + LIBBPF_NLA_NESTED, /**< Nested attributes */ + __LIBBPF_NLA_TYPE_MAX, +}; + +#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1) + +/** + * @ingroup attr + * Attribute validation policy. + * + * See section @core_doc{core_attr_parse,Attribute Parsing} for more details. + */ +struct libbpf_nla_policy { + /** Type of attribute or LIBBPF_NLA_UNSPEC */ + uint16_t type; + + /** Minimal length of payload required */ + uint16_t minlen; + + /** Maximal length of payload allowed */ + uint16_t maxlen; +}; + +/** + * @ingroup attr + * Iterate over a stream of attributes + * @arg pos loop counter, set to current attribute + * @arg head head of attribute stream + * @arg len length of attribute stream + * @arg rem initialized to len, holds bytes currently remaining in stream + */ +#define libbpf_nla_for_each_attr(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nla_ok(pos, rem); \ + pos = nla_next(pos, &(rem))) + +/** + * libbpf_nla_data - head of payload + * @nla: netlink attribute + */ +static inline void *libbpf_nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla) +{ + return *(uint8_t *)libbpf_nla_data(nla); +} + +static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla) +{ + return *(uint32_t *)libbpf_nla_data(nla); +} + +static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla) +{ + return (const char *)libbpf_nla_data(nla); +} + +/** + * libbpf_nla_len - length of payload + * @nla: netlink attribute + */ +static inline int libbpf_nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, + int len, struct libbpf_nla_policy *policy); +int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + struct libbpf_nla_policy *policy); + +int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh); + +#endif /* __LIBBPF_NLATTR_H */ diff --git a/src/contrib/libbpf/bpf/str_error.c b/src/contrib/libbpf/bpf/str_error.c new file mode 100644 index 0000000..b8064ee --- /dev/null +++ b/src/contrib/libbpf/bpf/str_error.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#undef _GNU_SOURCE +#include <string.h> +#include <stdio.h> +#include "str_error.h" + +/* + * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl + * libc, while checking strerror_r() return to avoid having to check this in + * all places calling it. + */ +char *libbpf_strerror_r(int err, char *dst, int len) +{ + int ret = strerror_r(err < 0 ? -err : err, dst, len); + if (ret) + snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); + return dst; +} diff --git a/src/contrib/libbpf/bpf/str_error.h b/src/contrib/libbpf/bpf/str_error.h new file mode 100644 index 0000000..a139334 --- /dev/null +++ b/src/contrib/libbpf/bpf/str_error.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __LIBBPF_STR_ERROR_H +#define __LIBBPF_STR_ERROR_H + +char *libbpf_strerror_r(int err, char *dst, int len); +#endif /* __LIBBPF_STR_ERROR_H */ diff --git a/src/contrib/libbpf/bpf/xsk.c b/src/contrib/libbpf/bpf/xsk.c new file mode 100644 index 0000000..8e0ffa8 --- /dev/null +++ b/src/contrib/libbpf/bpf/xsk.c @@ -0,0 +1,797 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <asm/barrier.h> +#include <linux/compiler.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_xdp.h> +#include <linux/sockios.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" +#include "xsk.h" + +#ifndef SOL_XDP + #define SOL_XDP 283 +#endif + +#ifndef AF_XDP + #define AF_XDP 44 +#endif + +#ifndef PF_XDP + #define PF_XDP AF_XDP +#endif + +struct xsk_umem { + struct xsk_ring_prod *fill; + struct xsk_ring_cons *comp; + char *umem_area; + struct xsk_umem_config config; + int fd; + int refcount; +}; + +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + __u64 outstanding_tx; + struct xsk_umem *umem; + struct xsk_socket_config config; + int fd; + int ifindex; + int prog_fd; + int xsks_map_fd; + __u32 queue_id; + char ifname[IFNAMSIZ]; +}; + +struct xsk_nl_info { + bool xdp_prog_attached; + int ifindex; + int fd; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + +int xsk_umem__fd(const struct xsk_umem *umem) +{ + return umem ? umem->fd : -EINVAL; +} + +int xsk_socket__fd(const struct xsk_socket *xsk) +{ + return xsk ? xsk->fd : -EINVAL; +} + +static bool xsk_page_aligned(void *buffer) +{ + unsigned long addr = (unsigned long)buffer; + + return !(addr & (getpagesize() - 1)); +} + +static void xsk_set_umem_config(struct xsk_umem_config *cfg, + const struct xsk_umem_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + cfg->flags = XSK_UMEM__DEFAULT_FLAGS; + return; + } + + cfg->fill_size = usr_cfg->fill_size; + cfg->comp_size = usr_cfg->comp_size; + cfg->frame_size = usr_cfg->frame_size; + cfg->frame_headroom = usr_cfg->frame_headroom; + cfg->flags = usr_cfg->flags; +} + +static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->libbpf_flags = 0; + cfg->xdp_flags = 0; + cfg->bind_flags = 0; + return 0; + } + + if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) + return -EINVAL; + + cfg->rx_size = usr_cfg->rx_size; + cfg->tx_size = usr_cfg->tx_size; + cfg->libbpf_flags = usr_cfg->libbpf_flags; + cfg->xdp_flags = usr_cfg->xdp_flags; + cfg->bind_flags = usr_cfg->bind_flags; + + return 0; +} + +static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) +{ + struct xdp_mmap_offsets_v1 off_v1; + + /* getsockopt on a kernel <= 5.3 has no flags fields. + * Copy over the offsets to the correct places in the >=5.4 format + * and put the flags where they would have been on that kernel. + */ + memcpy(&off_v1, off, sizeof(off_v1)); + + off->rx.producer = off_v1.rx.producer; + off->rx.consumer = off_v1.rx.consumer; + off->rx.desc = off_v1.rx.desc; + off->rx.flags = off_v1.rx.consumer + sizeof(__u32); + + off->tx.producer = off_v1.tx.producer; + off->tx.consumer = off_v1.tx.consumer; + off->tx.desc = off_v1.tx.desc; + off->tx.flags = off_v1.tx.consumer + sizeof(__u32); + + off->fr.producer = off_v1.fr.producer; + off->fr.consumer = off_v1.fr.consumer; + off->fr.desc = off_v1.fr.desc; + off->fr.flags = off_v1.fr.consumer + sizeof(__u32); + + off->cr.producer = off_v1.cr.producer; + off->cr.consumer = off_v1.cr.consumer; + off->cr.desc = off_v1.cr.desc; + off->cr.flags = off_v1.cr.consumer + sizeof(__u32); +} + +static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) +{ + socklen_t optlen; + int err; + + optlen = sizeof(*off); + err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); + if (err) + return err; + + if (optlen == sizeof(*off)) + return 0; + + if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { + xsk_mmap_offsets_v1(off); + return 0; + } + + return -EINVAL; +} + +int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, + __u64 size, struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xdp_mmap_offsets off; + struct xdp_umem_reg mr; + struct xsk_umem *umem; + void *map; + int err; + + if (!umem_area || !umem_ptr || !fill || !comp) + return -EFAULT; + if (!size && !xsk_page_aligned(umem_area)) + return -EINVAL; + + umem = calloc(1, sizeof(*umem)); + if (!umem) + return -ENOMEM; + + umem->fd = socket(AF_XDP, SOCK_RAW, 0); + if (umem->fd < 0) { + err = -errno; + goto out_umem_alloc; + } + + umem->umem_area = umem_area; + xsk_set_umem_config(&umem->config, usr_config); + + memset(&mr, 0, sizeof(mr)); + mr.addr = (uintptr_t)umem_area; + mr.len = size; + mr.chunk_size = umem->config.frame_size; + mr.headroom = umem->config.frame_headroom; + mr.flags = umem->config.flags; + + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, + &umem->config.fill_size, + sizeof(umem->config.fill_size)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, + &umem->config.comp_size, + sizeof(umem->config.comp_size)); + if (err) { + err = -errno; + goto out_socket; + } + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (err) { + err = -errno; + goto out_socket; + } + + map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, + XDP_UMEM_PGOFF_FILL_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + umem->fill = fill; + fill->mask = umem->config.fill_size - 1; + fill->size = umem->config.fill_size; + fill->producer = map + off.fr.producer; + fill->consumer = map + off.fr.consumer; + fill->flags = map + off.fr.flags; + fill->ring = map + off.fr.desc; + fill->cached_cons = umem->config.fill_size; + + map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, + XDP_UMEM_PGOFF_COMPLETION_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap; + } + + umem->comp = comp; + comp->mask = umem->config.comp_size - 1; + comp->size = umem->config.comp_size; + comp->producer = map + off.cr.producer; + comp->consumer = map + off.cr.consumer; + comp->flags = map + off.cr.flags; + comp->ring = map + off.cr.desc; + + *umem_ptr = umem; + return 0; + +out_mmap: + munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); +out_socket: + close(umem->fd); +out_umem_alloc: + free(umem); + return err; +} + +struct xsk_umem_config_v1 { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; +}; + +int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, + __u64 size, struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xsk_umem_config config; + + memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1)); + config.flags = 0; + + return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, + &config); +} +COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) +DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) + +static int xsk_load_xdp_prog(struct xsk_socket *xsk) +{ + static const int log_buf_size = 16 * 1024; + char log_buf[log_buf_size]; + int err, prog_fd; + + /* This is the C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * int ret, index = ctx->rx_queue_index; + * + * // A set entry here means that the correspnding queue_id + * // has an active AF_XDP socket bound to it. + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); + * if (ret > 0) + * return ret; + * + * // Fallback for pre-5.3 kernels, not supporting default + * // action in the flags parameter. + * if (bpf_map_lookup_elem(&xsks_map, &index)) + * return bpf_redirect_map(&xsks_map, index, 0); + * return XDP_PASS; + * } + */ + struct bpf_insn prog[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r2 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* if w0 != 0 goto pc+13 */ + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), + /* r2 = r10 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* r2 += -4 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* call bpf_map_lookup_elem */ + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + /* r1 = r0 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* r0 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_0, 2), + /* if r1 == 0 goto pc+5 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* r2 = *(u32 *)(r10 - 4) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = 0 */ + BPF_MOV64_IMM(BPF_REG_3, 0), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* The jumps are to this instruction */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, + "LGPL-2.1 or BSD-2-Clause", 0, log_buf, + log_buf_size); + if (prog_fd < 0) { + pr_warn("BPF log buffer:\n%s", log_buf); + return prog_fd; + } + + err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); + if (err) { + close(prog_fd); + return err; + } + + xsk->prog_fd = prog_fd; + return 0; +} + +static int xsk_get_max_queues(struct xsk_socket *xsk) +{ + struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; + struct ifreq ifr = {}; + int fd, err, ret; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -errno; + + ifr.ifr_data = (void *)&channels; + memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + err = ioctl(fd, SIOCETHTOOL, &ifr); + if (err && errno != EOPNOTSUPP) { + ret = -errno; + goto out; + } + + if (err) { + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + ret = 1; + } else { + /* Take the max of rx, tx, combined. Drivers return + * the number of channels in different ways. + */ + ret = max(channels.max_rx, channels.max_tx); + ret = max(ret, (int)channels.max_combined); + } + +out: + close(fd); + return ret; +} + +static int xsk_create_bpf_maps(struct xsk_socket *xsk) +{ + int max_queues; + int fd; + + max_queues = xsk_get_max_queues(xsk); + if (max_queues < 0) + return max_queues; + + fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, 0); + if (fd < 0) + return fd; + + xsk->xsks_map_fd = fd; + + return 0; +} + +static void xsk_delete_bpf_maps(struct xsk_socket *xsk) +{ + bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); + close(xsk->xsks_map_fd); +} + +static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) +{ + __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); + __u32 map_len = sizeof(struct bpf_map_info); + struct bpf_prog_info prog_info = {}; + struct bpf_map_info map_info; + int fd, err; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + return err; + + num_maps = prog_info.nr_map_ids; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); + if (!map_ids) + return -ENOMEM; + + memset(&prog_info, 0, prog_len); + prog_info.nr_map_ids = num_maps; + prog_info.map_ids = (__u64)(unsigned long)map_ids; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + goto out_map_ids; + + xsk->xsks_map_fd = -1; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + fd = bpf_map_get_fd_by_id(map_ids[i]); + if (fd < 0) + continue; + + err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); + if (err) { + close(fd); + continue; + } + + if (!strcmp(map_info.name, "xsks_map")) { + xsk->xsks_map_fd = fd; + continue; + } + + close(fd); + } + + err = 0; + if (xsk->xsks_map_fd == -1) + err = -ENOENT; + +out_map_ids: + free(map_ids); + return err; +} + +static int xsk_set_bpf_maps(struct xsk_socket *xsk) +{ + return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, + &xsk->fd, 0); +} + +static int xsk_setup_xdp_prog(struct xsk_socket *xsk) +{ + __u32 prog_id = 0; + int err; + + err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, + xsk->config.xdp_flags); + if (err) + return err; + + if (!prog_id) { + err = xsk_create_bpf_maps(xsk); + if (err) + return err; + + err = xsk_load_xdp_prog(xsk); + if (err) { + xsk_delete_bpf_maps(xsk); + return err; + } + } else { + xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (xsk->prog_fd < 0) + return -errno; + err = xsk_lookup_bpf_maps(xsk); + if (err) { + close(xsk->prog_fd); + return err; + } + } + + if (xsk->rx) + err = xsk_set_bpf_maps(xsk); + if (err) { + xsk_delete_bpf_maps(xsk); + close(xsk->prog_fd); + return err; + } + + return 0; +} + +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) +{ + void *rx_map = NULL, *tx_map = NULL; + struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; + struct xsk_socket *xsk; + int err; + + if (!umem || !xsk_ptr || !(rx || tx)) + return -EFAULT; + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_xsk_alloc; + + if (umem->refcount && + !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); + err = -EBUSY; + goto out_xsk_alloc; + } + + if (umem->refcount++ > 0) { + xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + if (xsk->fd < 0) { + err = -errno; + goto out_xsk_alloc; + } + } else { + xsk->fd = umem->fd; + } + + xsk->outstanding_tx = 0; + xsk->queue_id = queue_id; + xsk->umem = umem; + xsk->ifindex = if_nametoindex(ifname); + if (!xsk->ifindex) { + err = -errno; + goto out_socket; + } + memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); + xsk->ifname[IFNAMSIZ - 1] = '\0'; + + if (rx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, + &xsk->config.rx_size, + sizeof(xsk->config.rx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + if (tx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, + &xsk->config.tx_size, + sizeof(xsk->config.tx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + + err = xsk_get_mmap_offsets(xsk->fd, &off); + if (err) { + err = -errno; + goto out_socket; + } + + if (rx) { + rx_map = mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (rx_map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + rx->mask = xsk->config.rx_size - 1; + rx->size = xsk->config.rx_size; + rx->producer = rx_map + off.rx.producer; + rx->consumer = rx_map + off.rx.consumer; + rx->flags = rx_map + off.rx.flags; + rx->ring = rx_map + off.rx.desc; + } + xsk->rx = rx; + + if (tx) { + tx_map = mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (tx_map == MAP_FAILED) { + err = -errno; + goto out_mmap_rx; + } + + tx->mask = xsk->config.tx_size - 1; + tx->size = xsk->config.tx_size; + tx->producer = tx_map + off.tx.producer; + tx->consumer = tx_map + off.tx.consumer; + tx->flags = tx_map + off.tx.flags; + tx->ring = tx_map + off.tx.desc; + tx->cached_cons = xsk->config.tx_size; + } + xsk->tx = tx; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = xsk->ifindex; + sxdp.sxdp_queue_id = xsk->queue_id; + if (umem->refcount > 1) { + sxdp.sxdp_flags = XDP_SHARED_UMEM; + sxdp.sxdp_shared_umem_fd = umem->fd; + } else { + sxdp.sxdp_flags = xsk->config.bind_flags; + } + + err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + xsk->prog_fd = -1; + + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + err = xsk_setup_xdp_prog(xsk); + if (err) + goto out_mmap_tx; + } + + *xsk_ptr = xsk; + return 0; + +out_mmap_tx: + if (tx) + munmap(tx_map, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); +out_mmap_rx: + if (rx) + munmap(rx_map, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_socket: + if (--umem->refcount) + close(xsk->fd); +out_xsk_alloc: + free(xsk); + return err; +} + +int xsk_umem__delete(struct xsk_umem *umem) +{ + struct xdp_mmap_offsets off; + int err; + + if (!umem) + return 0; + + if (umem->refcount) + return -EBUSY; + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (!err) { + munmap(umem->fill->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + + close(umem->fd); + free(umem); + + return 0; +} + +void xsk_socket__delete(struct xsk_socket *xsk) +{ + size_t desc_sz = sizeof(struct xdp_desc); + struct xdp_mmap_offsets off; + int err; + + if (!xsk) + return; + + if (xsk->prog_fd != -1) { + xsk_delete_bpf_maps(xsk); + close(xsk->prog_fd); + } + + err = xsk_get_mmap_offsets(xsk->fd, &off); + if (!err) { + if (xsk->rx) { + munmap(xsk->rx->ring - off.rx.desc, + off.rx.desc + xsk->config.rx_size * desc_sz); + } + if (xsk->tx) { + munmap(xsk->tx->ring - off.tx.desc, + off.tx.desc + xsk->config.tx_size * desc_sz); + } + + } + + xsk->umem->refcount--; + /* Do not close an fd that also has an associated umem connected + * to it. + */ + if (xsk->fd != xsk->umem->fd) + close(xsk->fd); + free(xsk); +} diff --git a/src/contrib/libbpf/bpf/xsk.h b/src/contrib/libbpf/bpf/xsk.h new file mode 100644 index 0000000..584f682 --- /dev/null +++ b/src/contrib/libbpf/bpf/xsk.h @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#ifndef __LIBBPF_XSK_H +#define __LIBBPF_XSK_H + +#include <stdio.h> +#include <stdint.h> +#include <linux/if_xdp.h> + +#include "libbpf.h" +#include "libbpf_util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Do not access these members directly. Use the functions below. */ +#define DEFINE_XSK_RING(name) \ +struct name { \ + __u32 cached_prod; \ + __u32 cached_cons; \ + __u32 mask; \ + __u32 size; \ + __u32 *producer; \ + __u32 *consumer; \ + void *ring; \ + __u32 *flags; \ +} + +DEFINE_XSK_RING(xsk_ring_prod); +DEFINE_XSK_RING(xsk_ring_cons); + +/* For a detailed explanation on the memory barriers associated with the + * ring, please take a look at net/xdp/xsk_queue.h. + */ + +struct xsk_umem; +struct xsk_socket; + +static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, + __u32 idx) +{ + __u64 *addrs = (__u64 *)fill->ring; + + return &addrs[idx & fill->mask]; +} + +static inline const __u64 * +xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) +{ + const __u64 *addrs = (const __u64 *)comp->ring; + + return &addrs[idx & comp->mask]; +} + +static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, + __u32 idx) +{ + struct xdp_desc *descs = (struct xdp_desc *)tx->ring; + + return &descs[idx & tx->mask]; +} + +static inline const struct xdp_desc * +xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) +{ + const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; + + return &descs[idx & rx->mask]; +} + +static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) +{ + return *r->flags & XDP_RING_NEED_WAKEUP; +} + +static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) +{ + __u32 free_entries = r->cached_cons - r->cached_prod; + + if (free_entries >= nb) + return free_entries; + + /* Refresh the local tail pointer. + * cached_cons is r->size bigger than the real consumer pointer so + * that this addition can be avoided in the more frequently + * executed code that computs free_entries in the beginning of + * this function. Without this optimization it whould have been + * free_entries = r->cached_prod - r->cached_cons + r->size. + */ + r->cached_cons = *r->consumer + r->size; + + return r->cached_cons - r->cached_prod; +} + +static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) +{ + __u32 entries = r->cached_prod - r->cached_cons; + + if (entries == 0) { + r->cached_prod = *r->producer; + entries = r->cached_prod - r->cached_cons; + } + + return (entries > nb) ? nb : entries; +} + +static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, + size_t nb, __u32 *idx) +{ + if (xsk_prod_nb_free(prod, nb) < nb) + return 0; + + *idx = prod->cached_prod; + prod->cached_prod += nb; + + return nb; +} + +static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) +{ + /* Make sure everything has been written to the ring before indicating + * this to the kernel by writing the producer pointer. + */ + libbpf_smp_wmb(); + + *prod->producer += nb; +} + +static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, + size_t nb, __u32 *idx) +{ + size_t entries = xsk_cons_nb_avail(cons, nb); + + if (entries > 0) { + /* Make sure we do not speculatively read the data before + * we have received the packet buffers from the ring. + */ + libbpf_smp_rmb(); + + *idx = cons->cached_cons; + cons->cached_cons += entries; + } + + return entries; +} + +static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) +{ + /* Make sure data has been read before indicating we are done + * with the entries by updating the consumer pointer. + */ + libbpf_smp_rwmb(); + + *cons->consumer += nb; +} + +static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) +{ + return &((char *)umem_area)[addr]; +} + +static inline __u64 xsk_umem__extract_addr(__u64 addr) +{ + return addr & XSK_UNALIGNED_BUF_ADDR_MASK; +} + +static inline __u64 xsk_umem__extract_offset(__u64 addr) +{ + return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; +} + +static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) +{ + return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); +} + +LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); + +#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 +#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) +#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 +#define XSK_UMEM__DEFAULT_FLAGS 0 + +struct xsk_umem_config { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; + __u32 flags; +}; + +/* Flags for the libbpf_flags field. */ +#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) + +struct xsk_socket_config { + __u32 rx_size; + __u32 tx_size; + __u32 libbpf_flags; + __u32 xdp_flags; + __u16 bind_flags; +}; + +/* Set config to NULL to get the default configuration. */ +LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); + +/* Returns 0 for success and -EBUSY if the umem is still in use. */ +LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_XSK_H */ diff --git a/src/contrib/libbpf/include/asm/barrier.h b/src/contrib/libbpf/include/asm/barrier.h new file mode 100644 index 0000000..1fc6aee --- /dev/null +++ b/src/contrib/libbpf/include/asm/barrier.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#include <linux/compiler.h> + +#endif diff --git a/src/contrib/libbpf/include/linux/compiler.h b/src/contrib/libbpf/include/linux/compiler.h new file mode 100644 index 0000000..26336dc --- /dev/null +++ b/src/contrib/libbpf/include/linux/compiler.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LINUX_COMPILER_H +#define __LINUX_COMPILER_H + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#define READ_ONCE(x) (*(volatile typeof(x) *)&x) +#define WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) + +#define barrier() asm volatile("" ::: "memory") + +#if defined(__x86_64__) + +# define smp_rmb() barrier() +# define smp_wmb() barrier() +# define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") + +# define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +# define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p = READ_ONCE(*p); \ + barrier(); \ + ___p; \ +}) + +#elif defined(__aarch64__) + +# define smp_rmb() asm volatile("dmb ishld" ::: "memory") +# define smp_wmb() asm volatile("dmb ishst" ::: "memory") +# define smp_mb() asm volatile("dmb ish" ::: "memory") + +#endif + +#ifndef smp_mb +# define smp_mb() __sync_synchronize() +#endif + +#ifndef smp_rmb +# define smp_rmb() smp_mb() +#endif + +#ifndef smp_wmb +# define smp_wmb() smp_mb() +#endif + +#ifndef smp_store_release +# define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p = READ_ONCE(*p); \ + smp_mb(); \ + ___p; \ +}) +#endif + +#endif /* __LINUX_COMPILER_H */ diff --git a/src/contrib/libbpf/include/linux/err.h b/src/contrib/libbpf/include/linux/err.h new file mode 100644 index 0000000..1b1dafb --- /dev/null +++ b/src/contrib/libbpf/include/linux/err.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LINUX_ERR_H +#define __LINUX_ERR_H + +#include <linux/types.h> +#include <asm/errno.h> + +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) + +static inline void * ERR_PTR(long error_) +{ + return (void *) error_; +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline bool IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline bool IS_ERR_OR_NULL(const void *ptr) +{ + return (!ptr) || IS_ERR_VALUE((unsigned long)ptr); +} + +static inline long PTR_ERR_OR_ZERO(const void *ptr) +{ + return IS_ERR(ptr) ? PTR_ERR(ptr) : 0; +} + +#endif diff --git a/src/contrib/libbpf/include/linux/filter.h b/src/contrib/libbpf/include/linux/filter.h new file mode 100644 index 0000000..b0700e2 --- /dev/null +++ b/src/contrib/libbpf/include/linux/filter.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LINUX_FILTER_H +#define __LINUX_FILTER_H + +#include <linux/bpf.h> + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_EMIT_CALL(FUNC) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((FUNC) - BPF_FUNC_unspec) }) + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF1, \ + .imm = IMM1 }), \ + ((struct bpf_insn) { \ + .code = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF2, \ + .imm = IMM2 }) + +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \ + MAP_FD, 0) + +#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \ + BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \ + MAP_FD, VALUE_OFF) + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +#endif diff --git a/src/contrib/libbpf/include/linux/kernel.h b/src/contrib/libbpf/include/linux/kernel.h new file mode 100644 index 0000000..a4a7a9d --- /dev/null +++ b/src/contrib/libbpf/include/linux/kernel.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LINUX_KERNEL_H +#define __LINUX_KERNEL_H + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#ifndef max +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) +#endif + +#ifndef min +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) +#endif + +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + +#endif diff --git a/src/contrib/libbpf/include/linux/list.h b/src/contrib/libbpf/include/linux/list.h new file mode 100644 index 0000000..e3814f7 --- /dev/null +++ b/src/contrib/libbpf/include/linux/list.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LINUX_LIST_H +#define __LINUX_LIST_H + +#define LIST_HEAD_INIT(name) { &(name), &(name) } +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define POISON_POINTER_DELTA 0 +#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) + + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty() on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void __list_del_entry(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} + +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) +#define list_next_entry(pos, member) \ + list_entry((pos)->member.next, typeof(*(pos)), member) + +#endif diff --git a/src/contrib/libbpf/include/linux/overflow.h b/src/contrib/libbpf/include/linux/overflow.h new file mode 100644 index 0000000..53d7580 --- /dev/null +++ b/src/contrib/libbpf/include/linux/overflow.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LINUX_OVERFLOW_H +#define __LINUX_OVERFLOW_H + +#define is_signed_type(type) (((type)(-1)) < (type)1) +#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) +#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_min(T) ((T)((T)-type_max(T)-(T)1)) + +#ifndef unlikely +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +#ifdef __GNUC__ +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) +#if GCC_VERSION >= 50100 +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 +#endif +#endif + +#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW + +#define check_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_mul_overflow(__a, __b, __d); \ +}) + +#else + +/* + * If one of a or b is a compile-time constant, this avoids a division. + */ +#define __unsigned_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a * __b; \ + __builtin_constant_p(__b) ? \ + __b > 0 && __a > type_max(typeof(__a)) / __b : \ + __a > 0 && __b > type_max(typeof(__b)) / __a; \ +}) + +/* + * Signed multiplication is rather hard. gcc always follows C99, so + * division is truncated towards 0. This means that we can write the + * overflow check like this: + * + * (a > 0 && (b > MAX/a || b < MIN/a)) || + * (a < -1 && (b > MIN/a || b < MAX/a) || + * (a == -1 && b == MIN) + * + * The redundant casts of -1 are to silence an annoying -Wtype-limits + * (included in -Wextra) warning: When the type is u8 or u16, the + * __b_c_e in check_mul_overflow obviously selects + * __unsigned_mul_overflow, but unfortunately gcc still parses this + * code and warns about the limited range of __b. + */ + +#define __signed_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + typeof(a) __tmax = type_max(typeof(a)); \ + typeof(a) __tmin = type_min(typeof(a)); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (__u64)__a * (__u64)__b; \ + (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ + (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ + (__b == (typeof(__b))-1 && __a == __tmin); \ +}) + +#define check_mul_overflow(a, b, d) \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_mul_overflow(a, b, d), \ + __unsigned_mul_overflow(a, b, d)) + + +#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ + +#endif diff --git a/src/contrib/libbpf/include/linux/ring_buffer.h b/src/contrib/libbpf/include/linux/ring_buffer.h new file mode 100644 index 0000000..fc4677b --- /dev/null +++ b/src/contrib/libbpf/include/linux/ring_buffer.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef _TOOLS_LINUX_RING_BUFFER_H_ +#define _TOOLS_LINUX_RING_BUFFER_H_ + +#include <linux/compiler.h> + +static inline __u64 ring_buffer_read_head(struct perf_event_mmap_page *base) +{ + return smp_load_acquire(&base->data_head); +} + +static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, + __u64 tail) +{ + smp_store_release(&base->data_tail, tail); +} + +#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ diff --git a/src/contrib/libbpf/include/linux/types.h b/src/contrib/libbpf/include/linux/types.h new file mode 100644 index 0000000..bae1ed8 --- /dev/null +++ b/src/contrib/libbpf/include/linux/types.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LINUX_TYPES_H +#define __LINUX_TYPES_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <asm/types.h> +#include <asm/posix_types.h> + +#define __bitwise__ +#define __bitwise __bitwise__ + +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +#ifndef __aligned_u64 +# define __aligned_u64 __u64 __attribute__((aligned(8))) +#endif + +struct list_head { + struct list_head *next, *prev; +}; + +#endif diff --git a/src/contrib/libbpf/include/uapi/linux/bpf.h b/src/contrib/libbpf/include/uapi/linux/bpf.h new file mode 100644 index 0000000..dbbcf0b --- /dev/null +++ b/src/contrib/libbpf/include/uapi/linux/bpf.h @@ -0,0 +1,3692 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ + +#include <linux/types.h> +#include <linux/bpf_common.h> + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_JMP32 0x06 /* jmp mode in word width */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word (64-bit) */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +/* jmp encodings */ +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +struct bpf_lpm_trie_key { + __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ + __u8 data[0]; /* Arbitrary size */ +}; + +struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type */ +}; + +/* BPF syscall commands, see bpf(2) man-page for details. */ +enum bpf_cmd { + BPF_MAP_CREATE, + BPF_MAP_LOOKUP_ELEM, + BPF_MAP_UPDATE_ELEM, + BPF_MAP_DELETE_ELEM, + BPF_MAP_GET_NEXT_KEY, + BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, + BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, +}; + +enum bpf_map_type { + BPF_MAP_TYPE_UNSPEC, + BPF_MAP_TYPE_HASH, + BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, + BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_ARRAY_OF_MAPS, + BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, + BPF_MAP_TYPE_XSKMAP, + BPF_MAP_TYPE_SOCKHASH, + BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, + BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, +}; + +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ +enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, + BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_PROG_TYPE_LWT_SEG6LOCAL, + BPF_PROG_TYPE_LIRC_MODE2, + BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_PROG_TYPE_TRACING, +}; + +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, + BPF_SK_MSG_VERDICT, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, + BPF_CGROUP_SYSCTL, + BPF_CGROUP_UDP4_RECVMSG, + BPF_CGROUP_UDP6_RECVMSG, + BPF_CGROUP_GETSOCKOPT, + BPF_CGROUP_SETSOCKOPT, + BPF_TRACE_RAW_TP, + BPF_TRACE_FENTRY, + BPF_TRACE_FEXIT, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) + +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + +/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. + * Verifier does sub-register def/use analysis and identifies instructions whose + * def only matters for low 32-bit, high 32-bit is never referenced later + * through implicit zero extension. Therefore verifier notifies JIT back-ends + * that it is safe to ignore clearing high 32-bit for these instructions. This + * saves some back-ends a lot of code-gen. However such optimization is not + * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends + * hence hasn't used verifier's analysis result. But, we really want to have a + * way to be able to verify the correctness of the described optimization on + * x86_64 on which testsuites are frequently exercised. + * + * So, this flag is introduced. Once it is set, verifier will randomize high + * 32-bit for those instructions who has been identified as safe to ignore them. + * Then, if verifier is not doing correct analysis, such randomization will + * regress tests to expose bugs. + */ +#define BPF_F_TEST_RND_HI32 (1U << 2) + +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_STATE_FREQ (1U << 3) + +/* When BPF ldimm64's insn[0].src_reg != 0 then this can have + * two extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd map fd + * insn[1].imm: 0 offset into value + * insn[0].off: 0 0 + * insn[1].off: 0 0 + * ldimm64 rewrite: address of map address of map[0]+offset + * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + */ +#define BPF_PSEUDO_MAP_FD 1 +#define BPF_PSEUDO_MAP_VALUE 2 + +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + +/* flags for BPF_MAP_UPDATE_ELEM command */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ +#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ + +/* flags for BPF_MAP_CREATE command */ +#define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) +/* Specify numa node during map creation */ +#define BPF_F_NUMA_NODE (1U << 2) + +#define BPF_OBJ_NAME_LEN 16U + +/* Flags for accessing BPF object from syscall side. */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + +/* Flag for stack_map, store build_id+offset instead of pointer */ +#define BPF_F_STACK_BUILD_ID (1U << 5) + +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + +/* Flags for accessing BPF object from program side. */ +#define BPF_F_RDONLY_PROG (1U << 7) +#define BPF_F_WRONLY_PROG (1U << 8) + +/* Clone map from listener for newly accepted socket */ +#define BPF_F_CLONE (1U << 9) + +/* Enable memory-mapping BPF map */ +#define BPF_F_MMAPABLE (1U << 10) + +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + +enum bpf_stack_build_id_status { + /* user space need an empty entry to identify end of a trace */ + BPF_STACK_BUILD_ID_EMPTY = 0, + /* with valid build_id and offset */ + BPF_STACK_BUILD_ID_VALID = 1, + /* couldn't get build_id, fallback to ip */ + BPF_STACK_BUILD_ID_IP = 2, +}; + +#define BPF_BUILD_ID_SIZE 20 +struct bpf_stack_build_id { + __s32 status; + unsigned char build_id[BPF_BUILD_ID_SIZE]; + union { + __u64 offset; + __u64 ip; + }; +}; + +union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ + __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ + char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* not used */ + __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_ifindex; /* ifindex of netdev to prep for */ + /* For some prog types expected attach type must be known at + * load time to verify attach type specific parts of prog + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ + __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; + __u32 file_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + __u32 attach_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; + } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + __u32 btf_id; + }; + __u32 next_id; + __u32 open_flags; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; + + struct { + __u64 name; + __u32 prog_fd; + } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; +} __attribute__((aligned(8))); + +/* The description below is an attempt at providing documentation to eBPF + * developers about the multiple available eBPF helper functions. It can be + * parsed and used to produce a manual page. The workflow is the following, + * and requires the rst2man utility: + * + * $ ./scripts/bpf_helpers_doc.py \ + * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst + * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 + * $ man /tmp/bpf-helpers.7 + * + * Note that in order to produce this external documentation, some RST + * formatting is used in the descriptions to get "bold" and "italics" in + * manual pages. Also note that the few trailing white spaces are + * intentional, removing them would break paragraphs for rst2man. + * + * Start of BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + * Description + * Perform a lookup in *map* for an entry associated to *key*. + * Return + * Map value associated to *key*, or **NULL** if no entry was + * found. + * + * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * Description + * Add or update the value of the entry associated to *key* in + * *map* with *value*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * Flag value **BPF_NOEXIST** cannot be used for maps of types + * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all + * elements always exist), the helper would return an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * Description + * Delete entry with *key* from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * Description + * For tracing programs, safely attempt to read *size* bytes from + * kernel space address *unsafe_ptr* and store the data in *dst*. + * + * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() + * instead. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_ktime_get_ns(void) + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Return + * Current *ktime*. + * + * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * Description + * This helper is a "printk()-like" facility for debugging. It + * prints a message defined by format *fmt* (of size *fmt_size*) + * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * available. It can take up to three additional **u64** + * arguments (as an eBPF helpers, the total number of arguments is + * limited to five). + * + * Each time the helper is called, it appends a line to the trace. + * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is + * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * The format of the trace is customizable, and the exact output + * one will get depends on the options set in + * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *README* file under the same directory). However, it usually + * defaults to something like: + * + * :: + * + * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> + * + * In the above: + * + * * ``telnet`` is the name of the current task. + * * ``470`` is the PID of the current task. + * * ``001`` is the CPU number on which the task is + * running. + * * In ``.N..``, each character refers to a set of + * options (whether irqs are enabled, scheduling + * options, whether hard/softirqs are running, level of + * preempt_disabled respectively). **N** means that + * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** + * are set. + * * ``419421.045894`` is a timestamp. + * * ``0x00000001`` is a fake value used by BPF for the + * instruction pointer register. + * * ``<formatted msg>`` is the message formatted with + * *fmt*. + * + * The conversion specifiers supported by *fmt* are similar, but + * more limited than for printk(). They are **%d**, **%i**, + * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, + * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size + * of field, padding with zeroes, etc.) is available, and the + * helper will return **-EINVAL** (but print nothing) if it + * encounters an unknown specifier. + * + * Also, note that **bpf_trace_printk**\ () is slow, and should + * only be used for debugging purposes. For this reason, a notice + * bloc (spanning several lines) is printed to kernel logs and + * states that the helper should not be used "for production use" + * the first time this helper is used (or more precisely, when + * **trace_printk**\ () buffers are allocated). For passing values + * to user space, perf events should be preferred. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * u32 bpf_get_prandom_u32(void) + * Description + * Get a pseudo-random number. + * + * From a security point of view, this helper uses its own + * pseudo-random internal state, and cannot be used to infer the + * seed of other random functions in the kernel. However, it is + * essential to note that the generator used by the helper is not + * cryptographically secure. + * Return + * A random 32-bit unsigned value. + * + * u32 bpf_get_smp_processor_id(void) + * Description + * Get the SMP (symmetric multiprocessing) processor id. Note that + * all programs run with preemption disabled, which means that the + * SMP processor id is stable during all the execution of the + * program. + * Return + * The SMP id of the processor running the program. + * + * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. *flags* are a combination of + * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the + * checksum for the packet after storing the bytes) and + * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ + * **->swhash** and *skb*\ **->l4hash** to 0). + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * Description + * Recompute the layer 3 (e.g. IP) checksum for the packet + * associated to *skb*. Computation is incremental, so the helper + * must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored in *size*. + * Alternatively, it is possible to store the difference between + * the previous and the new values of the header field in *to*, by + * setting *from* and *size* to 0. For both methods, *offset* + * indicates the location of the IP checksum within the packet. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * Description + * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the + * packet associated to *skb*. Computation is incremental, so the + * helper must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored on the lowest + * four bits of *flags*. Alternatively, it is possible to store + * the difference between the previous and the new values of the + * header field in *to*, by setting *from* and the four lowest + * bits of *flags* to 0. For both methods, *offset* indicates the + * location of the IP checksum within the packet. In addition to + * the size of the field, *flags* can be added (bitwise OR) actual + * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left + * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * Description + * This special helper is used to trigger a "tail call", or in + * other words, to jump into another eBPF program. The same stack + * frame is used (but values on stack and in registers for the + * caller are not accessible to the callee). This mechanism allows + * for program chaining, either for raising the maximum number of + * available eBPF instructions, or to execute given programs in + * conditional blocks. For security reasons, there is an upper + * limit to the number of successive tail calls that can be + * performed. + * + * Upon call of this helper, the program attempts to jump into a + * program referenced at index *index* in *prog_array_map*, a + * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes + * *ctx*, a pointer to the context. + * + * If the call succeeds, the kernel immediately runs the first + * instruction of the new program. This is not a function call, + * and it never returns to the previous program. If the call + * fails, then the helper has no effect, and the caller continues + * to run its subsequent instructions. A call can fail if the + * destination program for the jump does not exist (i.e. *index* + * is superior to the number of entries in *prog_array_map*), or + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), + * which is currently set to 32. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * Description + * Clone and redirect the packet associated to *skb* to another + * net device of index *ifindex*. Both ingress and egress + * interfaces can be used for redirection. The **BPF_F_INGRESS** + * value in *flags* is used to make the distinction (ingress path + * is selected if the flag is present, egress path otherwise). + * This is the only flag supported for now. + * + * In comparison with **bpf_redirect**\ () helper, + * **bpf_clone_redirect**\ () has the associated cost of + * duplicating the packet buffer, but this can be executed out of + * the eBPF program. Conversely, **bpf_redirect**\ () is more + * efficient, but it is handled through an action code where the + * redirection happens only after the eBPF program has returned. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_get_current_pid_tgid(void) + * Return + * A 64-bit integer containing the current tgid and pid, and + * created as such: + * *current_task*\ **->tgid << 32 \|** + * *current_task*\ **->pid**. + * + * u64 bpf_get_current_uid_gid(void) + * Return + * A 64-bit integer containing the current GID and UID, and + * created as such: *current_gid* **<< 32 \|** *current_uid*. + * + * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * Description + * Copy the **comm** attribute of the current task into *buf* of + * *size_of_buf*. The **comm** attribute contains the name of + * the executable (excluding the path) for the current task. The + * *size_of_buf* must be strictly positive. On success, the + * helper makes sure that the *buf* is NUL-terminated. On failure, + * it is filled with zeroes. + * Return + * 0 on success, or a negative error in case of failure. + * + * u32 bpf_get_cgroup_classid(struct sk_buff *skb) + * Description + * Retrieve the classid for the current task, i.e. for the net_cls + * cgroup to which *skb* belongs. + * + * This helper can be used on TC egress path, but not on ingress. + * + * The net_cls cgroup provides an interface to tag network packets + * based on a user-provided identifier for all traffic coming from + * the tasks belonging to the related cgroup. See also the related + * kernel documentation, available from the Linux sources in file + * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. + * + * The Linux kernel has two versions for cgroups: there are + * cgroups v1 and cgroups v2. Both are available to users, who can + * use a mixture of them, but note that the net_cls cgroup is for + * cgroup v1 only. This makes it incompatible with BPF programs + * run on cgroups, which is a cgroup-v2-only feature (a socket can + * only hold data for one version of cgroups at a time). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to + * "**y**" or to "**m**". + * Return + * The classid, or 0 for the default unconfigured classid. + * + * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * Description + * Push a *vlan_tci* (VLAN tag control information) of protocol + * *vlan_proto* to the packet associated to *skb*, then update + * the checksum. Note that if *vlan_proto* is different from + * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to + * be **ETH_P_8021Q**. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_vlan_pop(struct sk_buff *skb) + * Description + * Pop a VLAN header from the packet associated to *skb*. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Get tunnel metadata. This helper takes a pointer *key* to an + * empty **struct bpf_tunnel_key** of **size**, that will be + * filled with tunnel metadata for the packet associated to *skb*. + * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which + * indicates that the tunnel is based on IPv6 protocol instead of + * IPv4. + * + * The **struct bpf_tunnel_key** is an object that generalizes the + * principal parameters used by various tunneling protocols into a + * single struct. This way, it can be used to easily make a + * decision based on the contents of the encapsulation header, + * "summarized" in this struct. In particular, it holds the IP + * address of the remote end (IPv4 or IPv6, depending on the case) + * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, + * this struct exposes the *key*\ **->tunnel_id**, which is + * generally mapped to a VNI (Virtual Network Identifier), making + * it programmable together with the **bpf_skb_set_tunnel_key**\ + * () helper. + * + * Let's imagine that the following code is part of a program + * attached to the TC ingress interface, on one end of a GRE + * tunnel, and is supposed to filter out all messages coming from + * remote ends with IPv4 address other than 10.0.0.1: + * + * :: + * + * int ret; + * struct bpf_tunnel_key key = {}; + * + * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + * if (ret < 0) + * return TC_ACT_SHOT; // drop packet + * + * if (key.remote_ipv4 != 0x0a000001) + * return TC_ACT_SHOT; // drop packet + * + * return TC_ACT_OK; // accept packet + * + * This interface can also be used with all encapsulation devices + * that can operate in "collect metadata" mode: instead of having + * one network device per specific configuration, the "collect + * metadata" mode only requires a single device where the + * configuration can be extracted from this helper. + * + * This can be used together with various tunnels such as VXLan, + * Geneve, GRE or IP in IP (IPIP). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Populate tunnel metadata for packet associated to *skb.* The + * tunnel metadata is set to the contents of *key*, of *size*. The + * *flags* can be set to a combination of the following values: + * + * **BPF_F_TUNINFO_IPV6** + * Indicate that the tunnel is based on IPv6 protocol + * instead of IPv4. + * **BPF_F_ZERO_CSUM_TX** + * For IPv4 packets, add a flag to tunnel metadata + * indicating that checksum computation should be skipped + * and checksum set to zeroes. + * **BPF_F_DONT_FRAGMENT** + * Add a flag to tunnel metadata indicating that the + * packet should not be fragmented. + * **BPF_F_SEQ_NUMBER** + * Add a flag to tunnel metadata indicating that a + * sequence number should be added to tunnel header before + * sending the packet. This flag was added for GRE + * encapsulation, but might be used with other protocols + * as well in the future. + * + * Here is a typical usage on the transmit path: + * + * :: + * + * struct bpf_tunnel_key key; + * populate key ... + * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); + * + * See also the description of the **bpf_skb_get_tunnel_key**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) + * Description + * Read the value of a perf event counter. This helper relies on a + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of + * the perf event counter is selected when *map* is updated with + * perf event file descriptors. The *map* is an array whose size + * is the number of available CPUs, and each cell contains a value + * relative to one CPU. The value to retrieve is indicated by + * *flags*, that contains the index of the CPU to look up, masked + * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * Note that before Linux 4.13, only hardware perf event can be + * retrieved. + * + * Also, be aware that the newer helper + * **bpf_perf_event_read_value**\ () is recommended over + * **bpf_perf_event_read**\ () in general. The latter has some ABI + * quirks where error and counter value are used as a return code + * (which is wrong to do since ranges may overlap). This issue is + * fixed with **bpf_perf_event_read_value**\ (), which at the same + * time provides more features over the **bpf_perf_event_read**\ + * () interface. Please refer to the description of + * **bpf_perf_event_read_value**\ () for details. + * Return + * The value of the perf event counter read from the map, or a + * negative error code in case of failure. + * + * int bpf_redirect(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_clone_redirect**\ + * (), except that the packet is not cloned, which provides + * increased performance. + * + * Except for XDP, both ingress and egress interfaces can be used + * for redirection. The **BPF_F_INGRESS** value in *flags* is used + * to make the distinction (ingress path is selected if the flag + * is present, egress path otherwise). Currently, XDP only + * supports redirection to the egress interface, and accepts no + * flag at all. + * + * The same effect can be attained with the more generic + * **bpf_redirect_map**\ (), which requires specific maps to be + * used but offers better performance. + * Return + * For XDP, the helper returns **XDP_REDIRECT** on success or + * **XDP_ABORTED** on error. For other program types, the values + * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on + * error. + * + * u32 bpf_get_route_realm(struct sk_buff *skb) + * Description + * Retrieve the realm or the route, that is to say the + * **tclassid** field of the destination for the *skb*. The + * indentifier retrieved is a user-provided tag, similar to the + * one used with the net_cls cgroup (see description for + * **bpf_get_cgroup_classid**\ () helper), but here this tag is + * held by a route (a destination entry), not by a task. + * + * Retrieving this identifier works with the clsact TC egress hook + * (see also **tc-bpf(8)**), or alternatively on conventional + * classful egress qdiscs, but not on TC ingress path. In case of + * clsact TC egress hook, this has the advantage that, internally, + * the destination entry has not been dropped yet in the transmit + * path. Therefore, the destination entry does not need to be + * artificially held via **netif_keep_dst**\ () for a classful + * qdisc until the *skb* is freed. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_IP_ROUTE_CLASSID** configuration option. + * Return + * The realm of the route for the packet associated to *skb*, or 0 + * if none was found. + * + * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * The context of the program *ctx* needs also be passed to the + * helper. + * + * On user space, a program willing to read the values needs to + * call **perf_event_open**\ () on the perf event (either for + * one or for all CPUs) and to store the file descriptor into the + * *map*. This must be done before the eBPF program can send data + * into it. An example is available in file + * *samples/bpf/trace_output_user.c* in the Linux kernel source + * tree (the eBPF program counterpart is in + * *samples/bpf/trace_output_kern.c*). + * + * **bpf_perf_event_output**\ () achieves better performance + * than **bpf_trace_printk**\ () for sharing data with user + * space, and is much better suitable for streaming data from eBPF + * programs. + * + * Note that this helper is not restricted to tracing use cases + * and can be used with programs attached to TC or XDP as well, + * where it allows for passing data to user space listeners. Data + * can be: + * + * * Only custom structs, + * * Only the packet payload, or + * * A combination of both. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * Description + * This helper was provided as an easy way to load data from a + * packet. It can be used to load *len* bytes from *offset* from + * the packet associated to *skb*, into the buffer pointed by + * *to*. + * + * Since Linux 4.7, usage of this helper has mostly been replaced + * by "direct packet access", enabling packet data to be + * manipulated with *skb*\ **->data** and *skb*\ **->data_end** + * pointing respectively to the first byte of packet data and to + * the byte after the last byte of packet data. However, it + * remains useful if one wishes to read large quantities of data + * at once from a packet into the eBPF stack. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * Description + * Walk a user or a kernel stack and return its id. To achieve + * this, the helper needs *ctx*, which is a pointer to the context + * on which the tracing program is executed, and a pointer to a + * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * a combination of the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_FAST_STACK_CMP** + * Compare stacks by hash only. + * **BPF_F_REUSE_STACKID** + * If two different stacks hash into the same *stackid*, + * discard the old one. + * + * The stack id retrieved is a 32 bit long integer handle which + * can be further combined with other data (including other stack + * ids) and used as a key into maps. This can be useful for + * generating a variety of graphs (such as flame graphs or off-cpu + * graphs). + * + * For walking a stack, this helper is an improvement over + * **bpf_probe_read**\ (), which can be used with unrolled loops + * but is not efficient and consumes a lot of eBPF instructions. + * Instead, **bpf_get_stackid**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return + * The positive or null stack id on success, or a negative error + * in case of failure. + * + * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) + * Description + * Compute a checksum difference, from the raw buffer pointed by + * *from*, of length *from_size* (that must be a multiple of 4), + * towards the raw buffer pointed by *to*, of size *to_size* + * (same remark). An optional *seed* can be added to the value + * (this can be cascaded, the seed may come from a previous call + * to the helper). + * + * This is flexible enough to be used in several ways: + * + * * With *from_size* == 0, *to_size* > 0 and *seed* set to + * checksum, it can be used when pushing new data. + * * With *from_size* > 0, *to_size* == 0 and *seed* set to + * checksum, it can be used when removing data from a packet. + * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it + * can be used to compute a diff. Note that *from_size* and + * *to_size* do not need to be equal. + * + * This helper can be used in combination with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to + * which one can feed in the difference computed with + * **bpf_csum_diff**\ (). + * Return + * The checksum result, or a negative error code in case of + * failure. + * + * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * Description + * Retrieve tunnel options metadata for the packet associated to + * *skb*, and store the raw tunnel option data to the buffer *opt* + * of *size*. + * + * This helper can be used with encapsulation devices that can + * operate in "collect metadata" mode (please refer to the related + * note in the description of **bpf_skb_get_tunnel_key**\ () for + * more details). A particular example where this can be used is + * in combination with the Geneve encapsulation protocol, where it + * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) + * and retrieving arbitrary TLVs (Type-Length-Value headers) from + * the eBPF program. This allows for full customization of these + * headers. + * Return + * The size of the option data retrieved. + * + * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * Description + * Set tunnel options metadata for the packet associated to *skb* + * to the option data contained in the raw buffer *opt* of *size*. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * Description + * Change the protocol of the *skb* to *proto*. Currently + * supported are transition from IPv4 to IPv6, and from IPv6 to + * IPv4. The helper takes care of the groundwork for the + * transition, including resizing the socket buffer. The eBPF + * program is expected to fill the new headers, if any, via + * **skb_store_bytes**\ () and to recompute the checksums with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ + * (). The main case for this helper is to perform NAT64 + * operations out of an eBPF program. + * + * Internally, the GSO type is marked as dodgy so that headers are + * checked and segments are recalculated by the GSO/GRO engine. + * The size for GSO target is adapted as well. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * Description + * Change the packet type for the packet associated to *skb*. This + * comes down to setting *skb*\ **->pkt_type** to *type*, except + * the eBPF program does not have a write access to *skb*\ + * **->pkt_type** beside this helper. Using a helper here allows + * for graceful handling of errors. + * + * The major use case is to change incoming *skb*s to + * **PACKET_HOST** in a programmatic way instead of having to + * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for + * example. + * + * Note that *type* only allows certain values. At this time, they + * are: + * + * **PACKET_HOST** + * Packet is for us. + * **PACKET_BROADCAST** + * Send packet to all. + * **PACKET_MULTICAST** + * Send packet to group. + * **PACKET_OTHERHOST** + * Send packet to someone else. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * Description + * Check whether *skb* is a descendant of the cgroup2 held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* failed the cgroup2 descendant test. + * * 1, if the *skb* succeeded the cgroup2 descendant test. + * * A negative error code, if an error occurred. + * + * u32 bpf_get_hash_recalc(struct sk_buff *skb) + * Description + * Retrieve the hash of the packet, *skb*\ **->hash**. If it is + * not set, in particular if the hash was cleared due to mangling, + * recompute this hash. Later accesses to the hash can be done + * directly with *skb*\ **->hash**. + * + * Calling **bpf_set_hash_invalid**\ (), changing a packet + * prototype with **bpf_skb_change_proto**\ (), or calling + * **bpf_skb_store_bytes**\ () with the + * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear + * the hash and to trigger a new computation for the next call to + * **bpf_get_hash_recalc**\ (). + * Return + * The 32-bit hash. + * + * u64 bpf_get_current_task(void) + * Return + * A pointer to the current task struct. + * + * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * Description + * Attempt in a safe way to write *len* bytes from the buffer + * *src* to *dst* in memory. It only works for threads that are in + * user context, and *dst* must be a valid user space address. + * + * This helper should not be used to implement any kind of + * security mechanism because of TOC-TOU attacks, but rather to + * debug, divert, and manipulate execution of semi-cooperative + * processes. + * + * Keep in mind that this feature is meant for experiments, and it + * has a risk of crashing the system and running programs. + * Therefore, when an eBPF program using this helper is attached, + * a warning including PID and process name is printed to kernel + * logs. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * Description + * Check whether the probe is being run is the context of a given + * subset of the cgroup2 hierarchy. The cgroup2 to test is held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* task belongs to the cgroup2. + * * 1, if the *skb* task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Resize (trim or grow) the packet associated to *skb* to the + * new *len*. The *flags* are reserved for future usage, and must + * be left at zero. + * + * The basic idea is that the helper performs the needed work to + * change the size of the packet, then the eBPF program rewrites + * the rest via helpers like **bpf_skb_store_bytes**\ (), + * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () + * and others. This helper is a slow path utility intended for + * replies with control messages. And because it is targeted for + * slow path, the helper itself can afford to be slow: it + * implicitly linearizes, unclones and drops offloads from the + * *skb*. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * Description + * Pull in non-linear data in case the *skb* is non-linear and not + * all of *len* are part of the linear section. Make *len* bytes + * from *skb* readable and writable. If a zero value is passed for + * *len*, then the whole length of the *skb* is pulled. + * + * This helper is only needed for reading and writing with direct + * packet access. + * + * For direct packet access, testing that offsets to access + * are within packet boundaries (test on *skb*\ **->data_end**) is + * susceptible to fail if offsets are invalid, or if the requested + * data is in non-linear parts of the *skb*. On failure the + * program can just bail out, or in the case of a non-linear + * buffer, use a helper to make the data available. The + * **bpf_skb_load_bytes**\ () helper is a first solution to access + * the data. Another one consists in using **bpf_skb_pull_data** + * to pull in once the non-linear parts, then retesting and + * eventually access the data. + * + * At the same time, this also makes sure the *skb* is uncloned, + * which is a necessary condition for direct write. As this needs + * to be an invariant for the write part only, the verifier + * detects writes and adds a prologue that is calling + * **bpf_skb_pull_data()** to effectively unclone the *skb* from + * the very beginning in case it is indeed cloned. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) + * Description + * Add the checksum *csum* into *skb*\ **->csum** in case the + * driver has supplied a checksum for the entire packet into that + * field. Return an error otherwise. This helper is intended to be + * used in combination with **bpf_csum_diff**\ (), in particular + * when the checksum needs to be updated after data has been + * written into the packet through direct packet access. + * Return + * The checksum on success, or a negative error code in case of + * failure. + * + * void bpf_set_hash_invalid(struct sk_buff *skb) + * Description + * Invalidate the current *skb*\ **->hash**. It can be used after + * mangling on headers through direct packet access, in order to + * indicate that the hash is outdated and to trigger a + * recalculation the next time the kernel tries to access this + * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * + * int bpf_get_numa_node_id(void) + * Description + * Return the id of the current NUMA node. The primary use case + * for this helper is the selection of sockets for the local NUMA + * node, when the program is attached to sockets using the + * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), + * but the helper is also available to other eBPF program types, + * similarly to **bpf_get_smp_processor_id**\ (). + * Return + * The id of current NUMA node. + * + * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Grows headroom of packet associated to *skb* and adjusts the + * offset of the MAC header accordingly, adding *len* bytes of + * space. It automatically extends and reallocates memory as + * required. + * + * This helper can be used on a layer 3 *skb* to push a MAC header + * for redirection into a layer 2 device. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that + * it is possible to use a negative value for *delta*. This helper + * can be used to prepare the packet for pushing or popping + * headers. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * Description + * Copy a NUL terminated string from an unsafe kernel address + * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for + * more details. + * + * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str() + * instead. + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * u64 bpf_get_socket_cookie(struct sk_buff *skb) + * Description + * If the **struct sk_buff** pointed by *skb* has a known socket, + * retrieve the cookie (generated by the kernel) of this socket. + * If no cookie has been set yet, generate a new cookie. Once + * generated, the socket cookie remains stable for the life of the + * socket. This helper can be useful for monitoring per socket + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. + * Return + * A 8-byte long non-decreasing number on success, or 0 if the + * socket field is missing inside *skb*. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_addr** context. + * Return + * A 8-byte long non-decreasing number. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_ops** context. + * Return + * A 8-byte long non-decreasing number. + * + * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Return + * The owner UID of the socket associated to *skb*. If the socket + * is **NULL**, or if it is not a full socket (i.e. if it is a + * time-wait or a request socket instead), **overflowuid** value + * is returned (note that **overflowuid** might also be the actual + * UID value for the socket). + * + * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * Description + * Set the full hash for *skb* (set the field *skb*\ **->hash**) + * to value *hash*. + * Return + * 0 + * + * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * Description + * Emulate a call to **setsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **setsockopt(2)** for more information. + * The option value of length *optlen* is pointed by *optval*. + * + * This helper actually implements a subset of **setsockopt()**. + * It supports the following *level*\ s: + * + * * **SOL_SOCKET**, which supports the following *optname*\ s: + * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * * **IPPROTO_TCP**, which supports the following *optname*\ s: + * **TCP_CONGESTION**, **TCP_BPF_IW**, + * **TCP_BPF_SNDCWND_CLAMP**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * Description + * Grow or shrink the room for data in the packet associated to + * *skb* by *len_diff*, and according to the selected *mode*. + * + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). + * + * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer + * (room space is added or removed below the layer 3 header). + * + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, + * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: + * Use with ENCAP_L3 flags to further specify the tunnel type. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): + * Use with ENCAP_L3/L4 flags to further specify the tunnel + * type; *len* is the length of the inner MAC header. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the endpoint referenced by *map* at + * index *key*. Depending on its type, this *map* can contain + * references to net devices (for forwarding packets through other + * ports), or to CPUs (for redirecting XDP frames to another CPU; + * but this is only implemented for native XDP (with driver + * support) as of this writing). + * + * The lower two bits of *flags* are used as the return code if + * the map lookup fails. This is so that the return value can be + * one of the XDP program return codes up to XDP_TX, as chosen by + * the caller. Any higher bits in the *flags* argument must be + * unset. + * + * When used to redirect packets to net devices, this helper + * provides a high performance increase over **bpf_redirect**\ (). + * This is due to various implementation details of the underlying + * mechanisms, one of which is the fact that **bpf_redirect_map**\ + * () tries to send packet as a "bulk" to the device. + * Return + * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * + * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a *map* referencing sockets. The + * *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust the address pointed by *xdp_md*\ **->data_meta** by + * *delta* (which can be positive or negative). Note that this + * operation modifies the address stored in *xdp_md*\ **->data**, + * so the latter must be loaded only after the helper has been + * called. + * + * The use of *xdp_md*\ **->data_meta** is optional and programs + * are not required to use it. The rationale is that when the + * packet is processed with XDP (e.g. as DoS filter), it is + * possible to push further meta data along with it before passing + * to the stack, and to give the guarantee that an ingress eBPF + * program attached as a TC classifier on the same device can pick + * this up for further post-processing. Since TC works with socket + * buffers, it remains possible to set from XDP the **mark** or + * **priority** pointers, or other pointers for the socket buffer. + * Having this scratch space generic and programmable allows for + * more flexibility as the user is free to store whatever meta + * data they need. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * Read the value of a perf event counter, and store it into *buf* + * of size *buf_size*. This helper relies on a *map* of type + * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event + * counter is selected when *map* is updated with perf event file + * descriptors. The *map* is an array whose size is the number of + * available CPUs, and each cell contains a value relative to one + * CPU. The value to retrieve is indicated by *flags*, that + * contains the index of the CPU to look up, masked with + * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * This helper behaves in a way close to + * **bpf_perf_event_read**\ () helper, save that instead of + * just returning the value observed, it fills the *buf* + * structure. This allows for additional data to be retrieved: in + * particular, the enabled and running times (in *buf*\ + * **->enabled** and *buf*\ **->running**, respectively) are + * copied. In general, **bpf_perf_event_read_value**\ () is + * recommended over **bpf_perf_event_read**\ (), which has some + * ABI issues and provides fewer functionalities. + * + * These values are interesting, because hardware PMU (Performance + * Monitoring Unit) counters are limited resources. When there are + * more PMU based perf events opened than available counters, + * kernel will multiplex these events so each event gets certain + * percentage (but not all) of the PMU time. In case that + * multiplexing happens, the number of samples or counter value + * will not reflect the case compared to when no multiplexing + * occurs. This makes comparison between different runs difficult. + * Typically, the counter value should be normalized before + * comparing to other experiments. The usual normalization is done + * as follows. + * + * :: + * + * normalized_counter = counter * t_enabled / t_running + * + * Where t_enabled is the time enabled for event and t_running is + * the time running for event since last normalization. The + * enabled and running times are accumulated since the perf event + * open. To achieve scaling factor between two invocations of an + * eBPF program, users can can use CPU id as the key (which is + * typical for perf array usage model) to remember the previous + * value and do the calculation inside the eBPF program. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * For en eBPF program attached to a perf event, retrieve the + * value of the event counter associated to *ctx* and store it in + * the structure pointed by *buf* and of size *buf_size*. Enabled + * and running times are also stored in the structure (see + * description of helper **bpf_perf_event_read_value**\ () for + * more details). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * Description + * Emulate a call to **getsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **getsockopt(2)** for more information. + * The retrieved value is stored in the structure pointed by + * *opval* and of length *optlen*. + * + * This helper actually implements a subset of **getsockopt()**. + * It supports the following *level*\ s: + * + * * **IPPROTO_TCP**, which supports *optname* + * **TCP_CONGESTION**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_override_return(struct pt_regs *regs, u64 rc) + * Description + * Used for error injection, this helper uses kprobes to override + * the return value of the probed function, and to set it to *rc*. + * The first argument is the context *regs* on which the kprobe + * works. + * + * This helper works by setting setting the PC (program counter) + * to an override function which is run in place of the original + * probed function. This means the probed function is not run at + * all. The replacement function just returns with the required + * value. + * + * This helper has security implications, and thus is subject to + * restrictions. It is only available if the kernel was compiled + * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration + * option, and in this case it only works on functions tagged with + * **ALLOW_ERROR_INJECTION** in the kernel code. + * + * Also, the helper is only available for the architectures having + * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, + * x86 architecture is the only one to support this feature. + * Return + * 0 + * + * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * Description + * Attempt to set the value of the **bpf_sock_ops_cb_flags** field + * for the full TCP socket associated to *bpf_sock_ops* to + * *argval*. + * + * The primary use of this field is to determine if there should + * be calls to eBPF programs of type + * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP + * code. A program of the same type can change its value, per + * connection and as necessary, when the connection is + * established. This field is directly accessible for reading, but + * this helper must be used for updates in order to return an + * error if an eBPF program tries to set a callback that is not + * supported in the current kernel. + * + * *argval* is a flag array which can combine these flags: + * + * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) + * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) + * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) + * + * Therefore, this function can be used to clear a callback flag by + * setting the appropriate bit to zero. e.g. to disable the RTO + * callback: + * + * **bpf_sock_ops_cb_flags_set(bpf_sock,** + * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** + * + * Here are some examples of where one could call such eBPF + * program: + * + * * When RTO fires. + * * When a packet is retransmitted. + * * When the connection terminates. + * * When a packet is sent. + * * When a packet is received. + * Return + * Code **-EINVAL** if the socket is not a full TCP socket; + * otherwise, a positive number containing the bits that could not + * be set is returned (which comes down to 0 if all bits were set + * as required). + * + * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, apply the verdict of the eBPF program to + * the next *bytes* (number of bytes) of message *msg*. + * + * For example, this helper can be used in the following cases: + * + * * A single **sendmsg**\ () or **sendfile**\ () system call + * contains multiple logical messages that the eBPF program is + * supposed to read and for which it should apply a verdict. + * * An eBPF program only cares to read the first *bytes* of a + * *msg*. If the message has a large payload, then setting up + * and calling the eBPF program repeatedly for all bytes, even + * though the verdict is already known, would create unnecessary + * overhead. + * + * When called from within an eBPF program, the helper sets a + * counter internal to the BPF infrastructure, that is used to + * apply the last verdict to the next *bytes*. If *bytes* is + * smaller than the current data being processed from a + * **sendmsg**\ () or **sendfile**\ () system call, the first + * *bytes* will be sent and the eBPF program will be re-run with + * the pointer for start of data pointing to byte number *bytes* + * **+ 1**. If *bytes* is larger than the current data being + * processed, then the eBPF verdict will be applied to multiple + * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are + * consumed. + * + * Note that if a socket closes with the internal counter holding + * a non-zero value, this is not a problem because data is not + * being buffered for *bytes* and is sent as it is received. + * Return + * 0 + * + * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, prevent the execution of the verdict eBPF + * program for message *msg* until *bytes* (byte number) have been + * accumulated. + * + * This can be used when one needs a specific number of bytes + * before a verdict can be assigned, even if the data spans + * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme + * case would be a user calling **sendmsg**\ () repeatedly with + * 1-byte long message segments. Obviously, this is bad for + * performance, but it is still valid. If the eBPF program needs + * *bytes* bytes to validate a header, this helper can be used to + * prevent the eBPF program to be called again until *bytes* have + * been accumulated. + * Return + * 0 + * + * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * Description + * For socket policies, pull in non-linear data from user space + * for *msg* and set pointers *msg*\ **->data** and *msg*\ + * **->data_end** to *start* and *end* bytes offsets into *msg*, + * respectively. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it can only parse data that the (**data**, **data_end**) + * pointers have already consumed. For **sendmsg**\ () hooks this + * is likely the first scatterlist element. But for calls relying + * on the **sendpage** handler (e.g. **sendfile**\ ()) this will + * be the range (**0**, **0**) because the data is shared with + * user space and by default the objective is to avoid allowing + * user space to modify data while (or after) eBPF verdict is + * being decided. This helper can be used to pull in data and to + * set the start and end pointer to given values. Data will be + * copied if necessary (i.e. if data was not linear and if start + * and end pointers do not point to the same chunk). + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * Description + * Bind the socket associated to *ctx* to the address pointed by + * *addr*, of length *addr_len*. This allows for making outgoing + * connection from the desired IP address, which can be useful for + * example when all processes inside a cgroup should use one + * single IP address on a host that has multiple IP configured. + * + * This helper works for IPv4 and IPv6, TCP and UDP sockets. The + * domain (*addr*\ **->sa_family**) must be **AF_INET** (or + * **AF_INET6**). Looking for a free port to bind to can be + * expensive, therefore binding to port is not permitted by the + * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) + * must be set to zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is + * only possible to shrink the packet as of this writing, + * therefore *delta* must be a negative integer. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * Description + * Retrieve the XFRM state (IP transform framework, see also + * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. + * + * The retrieved value is stored in the **struct bpf_xfrm_state** + * pointed by *xfrm_state* and of length *size*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_XFRM** configuration option. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *ctx*, which is a pointer + * to the context on which the tracing program is executed. + * To store the stacktrace, the bpf program provides *buf* with + * a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * + * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * Description + * This helper is similar to **bpf_skb_load_bytes**\ () in that + * it provides an easy way to load *len* bytes from *offset* + * from the packet associated to *skb*, into the buffer pointed + * by *to*. The difference to **bpf_skb_load_bytes**\ () is that + * a fifth argument *start_header* exists in order to select a + * base offset to start from. *start_header* can be one of: + * + * **BPF_HDR_START_MAC** + * Base offset to load data from is *skb*'s mac header. + * **BPF_HDR_START_NET** + * Base offset to load data from is *skb*'s network header. + * + * In general, "direct packet access" is the preferred method to + * access packet data, however, this helper is in particular useful + * in socket filters where *skb*\ **->data** does not always point + * to the start of the mac header and where "direct packet access" + * is not available. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * Description + * Do FIB lookup in kernel tables using parameters in *params*. + * If lookup is successful and result shows packet is to be + * forwarded, the neighbor tables are searched for the nexthop. + * If successful (ie., FIB lookup shows forwarding and nexthop + * is resolved), the nexthop address is returned in ipv4_dst + * or ipv6_dst based on family, smac is set to mac address of + * egress device, dmac is set to nexthop mac address, rt_metric + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. + * + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * packet is not forwarded or needs assist from full stack + * + * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a sockhash *map* referencing sockets. + * The *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. + * if the verdeict eBPF program returns **SK_PASS**), redirect it + * to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * Description + * Encapsulate the packet associated to *skb* within a Layer 3 + * protocol header. This header is provided in the buffer at + * address *hdr*, with *len* its size in bytes. *type* indicates + * the protocol of the header and can be one of: + * + * **BPF_LWT_ENCAP_SEG6** + * IPv6 encapsulation with Segment Routing Header + * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, + * the IPv6 header is computed by the kernel. + * **BPF_LWT_ENCAP_SEG6_INLINE** + * Only works if *skb* contains an IPv6 packet. Insert a + * Segment Routing Header (**struct ipv6_sr_hdr**) inside + * the IPv6 header. + * **BPF_LWT_ENCAP_IP** + * IP encapsulation (GRE/GUE/IPIP/etc). The outer header + * must be IPv4 or IPv6, followed by zero or more + * additional headers, up to **LWT_BPF_MAX_HEADROOM** + * total bytes in all prepended headers. Please note that + * if **skb_is_gso**\ (*skb*) is true, no more than two + * headers can be prepended, and the inner header, if + * present, should be either GRE or UDP/GUE. + * + * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs + * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can + * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and + * **BPF_PROG_TYPE_LWT_XMIT**. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. Only the flags, tag and TLVs + * inside the outermost IPv6 Segment Routing Header can be + * modified through this helper. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * Description + * Adjust the size allocated to TLVs in the outermost IPv6 + * Segment Routing Header contained in the packet associated to + * *skb*, at position *offset* by *delta* bytes. Only offsets + * after the segments are accepted. *delta* can be as well + * positive (growing) as negative (shrinking). + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * Description + * Apply an IPv6 Segment Routing action of type *action* to the + * packet associated to *skb*. Each action takes a parameter + * contained at address *param*, and of length *param_len* bytes. + * *action* can be one of: + * + * **SEG6_LOCAL_ACTION_END_X** + * End.X action: Endpoint with Layer-3 cross-connect. + * Type of *param*: **struct in6_addr**. + * **SEG6_LOCAL_ACTION_END_T** + * End.T action: Endpoint with specific IPv6 table lookup. + * Type of *param*: **int**. + * **SEG6_LOCAL_ACTION_END_B6** + * End.B6 action: Endpoint bound to an SRv6 policy. + * Type of *param*: **struct ipv6_sr_hdr**. + * **SEG6_LOCAL_ACTION_END_B6_ENCAP** + * End.B6.Encap action: Endpoint bound to an SRv6 + * encapsulation policy. + * Type of *param*: **struct ipv6_sr_hdr**. + * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_repeat(void *ctx) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. + * + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 + * + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). + * + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 + * + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) + * Description + * Return the cgroup v2 id of the socket associated with the *skb*. + * This is roughly similar to the **bpf_get_cgroup_classid**\ () + * helper for cgroup v1 by providing a tag resp. identifier that + * can be matched on or used for map lookups e.g. to implement + * policy. The cgroup v2 id of a given path in the hierarchy is + * exposed in user space through the f_handle API in order to get + * to the same 64-bit id. + * + * This helper can be used on TC egress path, but not on ingress, + * and is available only if the kernel was compiled with the + * **CONFIG_SOCK_CGROUP_DATA** configuration option. + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * u64 bpf_get_current_cgroup_id(void) + * Return + * A 64-bit integer containing the current cgroup id based + * on the cgroup within which the current task is running. + * + * void *bpf_get_local_storage(void *map, u64 flags) + * Description + * Get the pointer to the local storage area. + * The type and the size of the local storage is defined + * by the *map* argument. + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, + * running simultaneously. + * + * A user should care about the synchronization by himself. + * For example, by using the **BPF_STX_XADD** instruction to alter + * the shared data. + * Return + * A pointer to the local storage area. + * + * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * Description + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. + * + * int bpf_sk_release(struct bpf_sock *sock) + * Description + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * Description + * For socket policies, insert *len* bytes into *msg* at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the *msg*. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * Description + * Will remove *len* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of *msg* + * payload and/or *pop* value being to large. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 + * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_sock** pointer such + * that all the fields in this **bpf_sock** can be accessed. + * Return + * A **struct bpf_sock** pointer on success, or **NULL** in + * case of failure. + * + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * Return + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in + * case of failure. + * + * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or **NULL** in + * case of failure. + * + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to **bpf_sk_lookup_tcp**\ (), except + * that it also returns timewait or request sockets. Use + * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the + * full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from *reuse*\ **->socks**\ [] using the hash of the + * tuple. + * + * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK for + * the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains **sizeof**\ (**struct tcphdr**). + * + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative + * error otherwise. + * + * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * Description + * Get name of sysctl in /proc/sys/ and copy it into provided by + * program buffer *buf* of size *buf_len*. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is + * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name + * only (e.g. "tcp_mem"). + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get current value of sysctl as it is presented in /proc/sys + * (incl. newline, etc), and copy it as a string into provided + * by program buffer *buf* of size *buf_len*. + * + * The whole value is copied, no matter what file position user + * space issued e.g. sys_read at. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if current value was unavailable, e.g. because + * sysctl is uninitialized and read returns -EIO for it. + * + * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get new value being written by user space to sysctl (before + * the actual write happens) and copy it as a string into + * provided by program buffer *buf* of size *buf_len*. + * + * User space may write new value at file position > 0. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if sysctl is being read. + * + * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * Description + * Override new value being written by user space to sysctl with + * value provided by program in buffer *buf* of size *buf_len*. + * + * *buf* should contain a string in same form as provided by user + * space on sysctl write. + * + * User space may write new value at file position > 0. To override + * the whole sysctl value file position should be set to zero. + * Return + * 0 on success. + * + * **-E2BIG** if the *buf_len* is too big. + * + * **-EINVAL** if sysctl is being read. + * + * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to a long integer according to the given base + * and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by **isspace**\ (3)) followed by a single + * optional '**-**' sign. + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space **strtol**\ (3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than *buf_len*. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + * + * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to an unsigned long integer according to the + * given base and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by **isspace**\ (3)). + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space **strtoul**\ (3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than *buf_len*. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + * + * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * Description + * Get a bpf-local-storage from a *sk*. + * + * Logically, it could be thought of getting the value from + * a *map* with *sk* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this + * helper enforces the key must be a full socket and the map must + * be a **BPF_MAP_TYPE_SK_STORAGE** also. + * + * Underneath, the value is stored locally at *sk* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf-local-storages residing at *sk*. + * + * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf-local-storage will be + * created if one does not exist. *value* can be used + * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf-local-storage. If *value* is + * **NULL**, the new bpf-local-storage will be zero initialized. + * Return + * A bpf-local-storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf-local-storage. + * + * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * Description + * Delete a bpf-local-storage from a *sk*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf-local-storage cannot be found. + * + * int bpf_send_signal(u32 sig) + * Description + * Send signal *sig* to the current task. + * Return + * 0 on success or successfully queued. + * + * **-EBUSY** if work queue under nmi is full. + * + * **-EINVAL** if *sig* is invalid. + * + * **-EPERM** if no permission to send the *sig*. + * + * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 + * + * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct sk_buff. + * + * This helper is similar to **bpf_perf_event_output**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * Description + * Safely attempt to read *size* bytes from user space address + * *unsafe_ptr* and store the data in *dst*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * Description + * Safely attempt to read *size* bytes from kernel space address + * *unsafe_ptr* and store the data in *dst*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * Description + * Copy a NUL terminated string from an unsafe user address + * *unsafe_ptr* to *dst*. The *size* should include the + * terminating NUL byte. In case the string length is smaller than + * *size*, the target is not padded with further NUL bytes. If the + * string length is larger than *size*, just *size*-1 bytes are + * copied and the last byte is set to NUL. + * + * On success, the length of the copied string is returned. This + * makes this helper useful in tracing programs for reading + * strings, and more importantly to get its length at runtime. See + * the following snippet: + * + * :: + * + * SEC("kprobe/sys_open") + * void bpf_sys_open(struct pt_regs *ctx) + * { + * char buf[PATHLEN]; // PATHLEN is defined to 256 + * int res = bpf_probe_read_user_str(buf, sizeof(buf), + * ctx->di); + * + * // Consume buf, for example push it to + * // userspace via bpf_perf_event_output(); we + * // can use res (the string length) as event + * // size, after checking its boundaries. + * } + * + * In comparison, using **bpf_probe_read_user()** helper here + * instead to read the string would require to estimate the length + * at compile time, and would often result in copying more memory + * than necessary. + * + * Another useful use case is when parsing individual process + * arguments or individual environment variables navigating + * *current*\ **->mm->arg_start** and *current*\ + * **->mm->env_start**: using this helper and the return value, + * one can quickly iterate at the right offset of the memory area. + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * Description + * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* + * to *dst*. Same semantics as with bpf_probe_read_user_str() apply. + * Return + * On success, the strictly positive length of the string, including + * the trailing NUL character. On error, a negative value. + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), \ + FN(skb_change_head), \ + FN(xdp_adjust_head), \ + FN(probe_read_str), \ + FN(get_socket_cookie), \ + FN(get_socket_uid), \ + FN(set_hash), \ + FN(setsockopt), \ + FN(skb_adjust_room), \ + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), \ + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), \ + FN(perf_prog_read_value), \ + FN(getsockopt), \ + FN(override_return), \ + FN(sock_ops_cb_flags_set), \ + FN(msg_redirect_map), \ + FN(msg_apply_bytes), \ + FN(msg_cork_bytes), \ + FN(msg_pull_data), \ + FN(bind), \ + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), \ + FN(get_stack), \ + FN(skb_load_bytes_relative), \ + FN(fib_lookup), \ + FN(sock_hash_update), \ + FN(msg_redirect_hash), \ + FN(sk_redirect_hash), \ + FN(lwt_push_encap), \ + FN(lwt_seg6_store_bytes), \ + FN(lwt_seg6_adjust_srh), \ + FN(lwt_seg6_action), \ + FN(rc_repeat), \ + FN(rc_keydown), \ + FN(skb_cgroup_id), \ + FN(get_current_cgroup_id), \ + FN(get_local_storage), \ + FN(sk_select_reuseport), \ + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), \ + FN(msg_push_data), \ + FN(msg_pop_data), \ + FN(rc_pointer_rel), \ + FN(spin_lock), \ + FN(spin_unlock), \ + FN(sk_fullsock), \ + FN(tcp_sock), \ + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), \ + FN(skc_lookup_tcp), \ + FN(tcp_check_syncookie), \ + FN(sysctl_get_name), \ + FN(sysctl_get_current_value), \ + FN(sysctl_get_new_value), \ + FN(sysctl_set_new_value), \ + FN(strtol), \ + FN(strtoul), \ + FN(sk_storage_get), \ + FN(sk_storage_delete), \ + FN(send_signal), \ + FN(tcp_gen_syncookie), \ + FN(skb_output), \ + FN(probe_read_user), \ + FN(probe_read_kernel), \ + FN(probe_read_user_str), \ + FN(probe_read_kernel_str), + +/* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call + */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +enum bpf_func_id { + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + __BPF_FUNC_MAX_ID, +}; +#undef __BPF_ENUM_FN + +/* All flags used by eBPF helper functions, placed here. */ + +/* BPF_FUNC_skb_store_bytes flags. */ +#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) +#define BPF_F_INVALIDATE_HASH (1ULL << 1) + +/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. + * First 4 bits are for passing the header field size. + */ +#define BPF_F_HDR_FIELD_MASK 0xfULL + +/* BPF_FUNC_l4_csum_replace flags. */ +#define BPF_F_PSEUDO_HDR (1ULL << 4) +#define BPF_F_MARK_MANGLED_0 (1ULL << 5) +#define BPF_F_MARK_ENFORCE (1ULL << 6) + +/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ +#define BPF_F_INGRESS (1ULL << 0) + +/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ +#define BPF_F_TUNINFO_IPV6 (1ULL << 0) + +/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ +#define BPF_F_SKIP_FIELD_MASK 0xffULL +#define BPF_F_USER_STACK (1ULL << 8) +/* flags used by BPF_FUNC_get_stackid only. */ +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define BPF_F_REUSE_STACKID (1ULL << 10) +/* flags used by BPF_FUNC_get_stack only. */ +#define BPF_F_USER_BUILD_ID (1ULL << 11) + +/* BPF_FUNC_skb_set_tunnel_key flags. */ +#define BPF_F_ZERO_CSUM_TX (1ULL << 1) +#define BPF_F_DONT_FRAGMENT (1ULL << 2) +#define BPF_F_SEQ_NUMBER (1ULL << 3) + +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +/* BPF_FUNC_perf_event_output for sk_buff input context. */ +#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + +#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff +#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 + +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) +#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) +#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) +#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ + BPF_ADJ_ROOM_ENCAP_L2_MASK) \ + << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) + +/* BPF_FUNC_sysctl_get_name flags. */ +#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) + +/* BPF_FUNC_sk_storage_get flags */ +#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) + +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, +}; + +/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ +enum bpf_hdr_start_off { + BPF_HDR_START_MAC, + BPF_HDR_START_NET, +}; + +/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ +enum bpf_lwt_encap_mode { + BPF_LWT_ENCAP_SEG6, + BPF_LWT_ENCAP_SEG6_INLINE, + BPF_LWT_ENCAP_IP, +}; + +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + +/* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +struct __sk_buff { + __u32 len; + __u32 pkt_type; + __u32 mark; + __u32 queue_mapping; + __u32 protocol; + __u32 vlan_present; + __u32 vlan_tci; + __u32 vlan_proto; + __u32 priority; + __u32 ingress_ifindex; + __u32 ifindex; + __u32 tc_index; + __u32 cb[5]; + __u32 hash; + __u32 tc_classid; + __u32 data; + __u32 data_end; + __u32 napi_id; + + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); + __u64 tstamp; + __u32 wire_len; + __u32 gso_segs; + __bpf_md_ptr(struct bpf_sock *, sk); +}; + +struct bpf_tunnel_key { + __u32 tunnel_id; + union { + __u32 remote_ipv4; + __u32 remote_ipv6[4]; + }; + __u8 tunnel_tos; + __u8 tunnel_ttl; + __u16 tunnel_ext; /* Padding, future use. */ + __u32 tunnel_label; +}; + +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + __u16 ext; /* Padding, future use. */ + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes. + * + * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been + * changed and should be routed based on its new L3 header. + * (This is an L3 redirect, as opposed to L2 redirect + * represented by BPF_REDIRECT above). + */ + BPF_LWT_REROUTE = 128, +}; + +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; + __u32 mark; + __u32 priority; + /* IP address also allows 1 and 2 bytes access */ + __u32 src_ip4; + __u32 src_ip6[4]; + __u32 src_port; /* host byte order */ + __u32 dst_port; /* network byte order */ + __u32 dst_ip4; + __u32 dst_ip6[4]; + __u32 state; +}; + +struct bpf_tcp_sock { + __u32 snd_cwnd; /* Sending congestion window */ + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + __u32 rtt_min; + __u32 snd_ssthresh; /* Slow start size threshold */ + __u32 rcv_nxt; /* What we want to receive next */ + __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ + __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 ecn_flags; /* ECN status bits. */ + __u32 rate_delivered; /* saved rate sample: packets delivered */ + __u32 rate_interval_us; /* saved rate sample: time elapsed */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 retrans_out; /* Retransmitted packets out */ + __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. + */ + __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ + __u32 delivered; /* Total data packets delivered incl. rexmits */ + __u32 delivered_ce; /* Like the above but only ECE marked packets */ + __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ +}; + +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + +struct bpf_xdp_sock { + __u32 queue_id; +}; + +#define XDP_PACKET_HEADROOM 256 + +/* User return codes for XDP prog type. + * A valid XDP program must return one of these defined values. All other + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). + */ +enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, + XDP_TX, + XDP_REDIRECT, +}; + +/* user accessible metadata for XDP packet hook + * new fields must be added to the end of this structure + */ +struct xdp_md { + __u32 data; + __u32 data_end; + __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ +}; + +enum sk_action { + SK_DROP = 0, + SK_PASS, +}; + +/* user accessible metadata for SK_MSG packet hook, new fields must + * be added to the end of this structure + */ +struct sk_msg_md { + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); + + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ +}; + +struct sk_reuseport_md { + /* + * Start of directly accessible data. It begins from + * the tcp/udp header. + */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); + /* + * Total length of packet (starting from the tcp/udp header). + * Note that the directly accessible bytes (data_end - data) + * could be less than this "len". Those bytes could be + * indirectly read by a helper "bpf_skb_load_bytes()". + */ + __u32 len; + /* + * Eth protocol in the mac header (network byte order). e.g. + * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD) + */ + __u32 eth_protocol; + __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ + __u32 bind_inany; /* Is sock bound to an INANY address? */ + __u32 hash; /* A hash of the packet 4 tuples */ +}; + +#define BPF_TAG_SIZE 8 + +struct bpf_prog_info { + __u32 type; + __u32 id; + __u8 tag[BPF_TAG_SIZE]; + __u32 jited_prog_len; + __u32 xlated_prog_len; + __aligned_u64 jited_prog_insns; + __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 gpl_compatible:1; + __u32 :31; /* alignment pad */ + __u64 netns_dev; + __u64 netns_ino; + __u32 nr_jited_ksyms; + __u32 nr_jited_func_lens; + __aligned_u64 jited_ksyms; + __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 nr_func_info; + __u32 nr_line_info; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 nr_jited_line_info; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; + __u64 run_time_ns; + __u64 run_cnt; +} __attribute__((aligned(8))); + +struct bpf_map_info { + __u32 type; + __u32 id; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 map_flags; + char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 :32; + __u64 netns_dev; + __u64 netns_ino; + __u32 btf_id; + __u32 btf_key_type_id; + __u32 btf_value_type_id; +} __attribute__((aligned(8))); + +struct bpf_btf_info { + __aligned_u64 btf; + __u32 btf_size; + __u32 id; +} __attribute__((aligned(8))); + +/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed + * by user and intended to be used by socket (e.g. to bind to, depends on + * attach attach type). + */ +struct bpf_sock_addr { + __u32 user_family; /* Allows 4-byte read, but no write. */ + __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. + * Stored in network byte order. + */ + __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. + * Stored in network byte order. + */ + __u32 user_port; /* Allows 4-byte read and write. + * Stored in network byte order + */ + __u32 family; /* Allows 4-byte read, but no write */ + __u32 type; /* Allows 4-byte read, but no write */ + __u32 protocol; /* Allows 4-byte read, but no write */ + __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. + * Stored in network byte order. + */ + __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. + * Stored in network byte order. + */ + __bpf_md_ptr(struct bpf_sock *, sk); +}; + +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * Some of this fields are in network (bigendian) byte order and may need + * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ + }; + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; + __bpf_md_ptr(struct bpf_sock *, sk); +}; + +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently + * supported cb flags + */ + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ + BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is + * based on the path and may be + * dependent on the congestion control + * algorithm. In general it indicates + * a congestion threshold. RTTs above + * this indicate congestion + */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ + BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after + * socket transition to LISTEN state. + */ + BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; + __u32 major; + __u32 minor; +}; + +struct bpf_raw_tracepoint_args { + __u64 args[0]; +}; + +/* DIRECT: Skip the FIB rules and go to FIB table associated with device + * OUTPUT: Do lookup from egress perspective; default is ingress + */ +#define BPF_FIB_LOOKUP_DIRECT (1U << 0) +#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) + +enum { + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ +}; + +struct bpf_fib_lookup { + /* input: network family for lookup (AF_INET, AF_INET6) + * output: network family of egress nexthop + */ + __u8 family; + + /* set if lookup is to consider L4 data - e.g., FIB rules */ + __u8 l4_protocol; + __be16 sport; + __be16 dport; + + /* total length of packet from network header - used for MTU check */ + __u16 tot_len; + + /* input: L3 device index for lookup + * output: device index from FIB lookup + */ + __u32 ifindex; + + union { + /* inputs to lookup */ + __u8 tos; /* AF_INET */ + __be32 flowinfo; /* AF_INET6, flow_label + priority */ + + /* output: metric of fib result (IPv4/IPv6 only) */ + __u32 rt_metric; + }; + + union { + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ + }; + + /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in + * network header. output: bpf_fib_lookup sets to gateway address + * if FIB lookup returns gateway route + */ + union { + __be32 ipv4_dst; + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ +}; + +enum bpf_task_fd_type { + BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_UPROBE, /* filename + offset */ + BPF_FD_TYPE_URETPROBE, /* filename + offset */ +}; + +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + +struct bpf_flow_keys { + __u16 nhoff; + __u16 thoff; + __u16 addr_proto; /* ETH_P_* of valid addrs */ + __u8 is_frag; + __u8 is_first_frag; + __u8 is_encap; + __u8 ip_proto; + __be16 n_proto; + __be16 sport; + __be16 dport; + union { + struct { + __be32 ipv4_src; + __be32 ipv4_dst; + }; + struct { + __u32 ipv6_src[4]; /* in6_addr; network order */ + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + }; + __u32 flags; + __be32 flow_label; +}; + +struct bpf_func_info { + __u32 insn_off; + __u32 type_id; +}; + +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + +struct bpf_spin_lock { + __u32 val; +}; + +struct bpf_sysctl { + __u32 write; /* Sysctl is being read (= 0) or written (= 1). + * Allows 1,2,4-byte read, but no write. + */ + __u32 file_pos; /* Sysctl file position to read from, write to. + * Allows 1,2,4-byte read an 4-byte write. + */ +}; + +struct bpf_sockopt { + __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(void *, optval); + __bpf_md_ptr(void *, optval_end); + + __s32 level; + __s32 optname; + __s32 optlen; + __s32 retval; +}; + +#endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/src/contrib/libbpf/include/uapi/linux/bpf_common.h b/src/contrib/libbpf/include/uapi/linux/bpf_common.h new file mode 100644 index 0000000..ee97668 --- /dev/null +++ b/src/contrib/libbpf/include/uapi/linux/bpf_common.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_BPF_COMMON_H__ +#define _UAPI__LINUX_BPF_COMMON_H__ + +/* Instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +#ifndef BPF_MAXINSNS +#define BPF_MAXINSNS 4096 +#endif + +#endif /* _UAPI__LINUX_BPF_COMMON_H__ */ diff --git a/src/contrib/libbpf/include/uapi/linux/btf.h b/src/contrib/libbpf/include/uapi/linux/btf.h new file mode 100644 index 0000000..63ae4a3 --- /dev/null +++ b/src/contrib/libbpf/include/uapi/linux/btf.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2018 Facebook */ +#ifndef _UAPI__LINUX_BTF_H__ +#define _UAPI__LINUX_BTF_H__ + +#include <linux/types.h> + +#define BTF_MAGIC 0xeB9F +#define BTF_VERSION 1 + +struct btf_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 type_off; /* offset of type section */ + __u32 type_len; /* length of type section */ + __u32 str_off; /* offset of string section */ + __u32 str_len; /* length of string section */ +}; + +/* Max # of type identifier */ +#define BTF_MAX_TYPE 0x0000ffff +/* Max offset into the string section */ +#define BTF_MAX_NAME_OFFSET 0x0000ffff +/* Max # of struct/union/enum members or func args */ +#define BTF_MAX_VLEN 0xffff + +struct btf_type { + __u32 name_off; + /* "info" bits arrangement + * bits 0-15: vlen (e.g. # of struct's members) + * bits 16-23: unused + * bits 24-27: kind (e.g. int, ptr, array...etc) + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd + */ + __u32 info; + /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC, FUNC_PROTO and VAR. + * "type" is a type_id referring to another type. + */ + union { + __u32 size; + __u32 type; + }; +}; + +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) +#define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KFLAG(info) ((info) >> 31) + +#define BTF_KIND_UNKN 0 /* Unknown */ +#define BTF_KIND_INT 1 /* Integer */ +#define BTF_KIND_PTR 2 /* Pointer */ +#define BTF_KIND_ARRAY 3 /* Array */ +#define BTF_KIND_STRUCT 4 /* Struct */ +#define BTF_KIND_UNION 5 /* Union */ +#define BTF_KIND_ENUM 6 /* Enumeration */ +#define BTF_KIND_FWD 7 /* Forward */ +#define BTF_KIND_TYPEDEF 8 /* Typedef */ +#define BTF_KIND_VOLATILE 9 /* Volatile */ +#define BTF_KIND_CONST 10 /* Const */ +#define BTF_KIND_RESTRICT 11 /* Restrict */ +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#define BTF_KIND_MAX BTF_KIND_DATASEC +#define NR_BTF_KINDS (BTF_KIND_MAX + 1) + +/* For some specific BTF_KIND, "struct btf_type" is immediately + * followed by extra data. + */ + +/* BTF_KIND_INT is followed by a u32 and the following + * is the 32 bits arrangement: + */ +#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) +#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) +#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) + +/* Attributes stored in the BTF_INT_ENCODING */ +#define BTF_INT_SIGNED (1 << 0) +#define BTF_INT_CHAR (1 << 1) +#define BTF_INT_BOOL (1 << 2) + +/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". + * The exact number of btf_enum is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum { + __u32 name_off; + __s32 val; +}; + +/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ +struct btf_array { + __u32 type; + __u32 index_type; + __u32 nelems; +}; + +/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed + * by multiple "struct btf_member". The exact number + * of btf_member is stored in the vlen (of the info in + * "struct btf_type"). + */ +struct btf_member { + __u32 name_off; + __u32 type; + /* If the type info kind_flag is set, the btf_member offset + * contains both member bitfield size and bit offset. The + * bitfield size is set for bitfield members. If the type + * info kind_flag is not set, the offset contains only bit + * offset. + */ + __u32 offset; +}; + +/* If the struct/union type info kind_flag is set, the + * following two macros are used to access bitfield_size + * and bit_offset from btf_member.offset. + */ +#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) +#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; +}; + +enum { + BTF_VAR_STATIC = 0, + BTF_VAR_GLOBAL_ALLOCATED, +}; + +/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe + * additional information related to the variable such as its linkage. + */ +struct btf_var { + __u32 linkage; +}; + +/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo" + * to describe all BTF_KIND_VAR types it contains along with it's + * in-section offset as well as size. + */ +struct btf_var_secinfo { + __u32 type; + __u32 offset; + __u32 size; +}; + +#endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/src/contrib/libbpf/include/uapi/linux/if_link.h b/src/contrib/libbpf/include/uapi/linux/if_link.h new file mode 100644 index 0000000..8aec876 --- /dev/null +++ b/src/contrib/libbpf/include/uapi/linux/if_link.h @@ -0,0 +1,1033 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_IF_LINK_H +#define _UAPI_LINUX_IF_LINK_H + +#include <linux/types.h> +#include <linux/netlink.h> + +/* This struct should be in sync with struct rtnl_link_stats64 */ +struct rtnl_link_stats { + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; + + __u32 rx_nohandler; /* dropped, no handler found */ +}; + +/* The main device statistics structure */ +struct rtnl_link_stats64 { + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ + __u64 multicast; /* multicast packets received */ + __u64 collisions; + + /* detailed rx_errors: */ + __u64 rx_length_errors; + __u64 rx_over_errors; /* receiver ring buff overflow */ + __u64 rx_crc_errors; /* recved pkt with crc error */ + __u64 rx_frame_errors; /* recv'd frame alignment error */ + __u64 rx_fifo_errors; /* recv'r fifo overrun */ + __u64 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u64 tx_aborted_errors; + __u64 tx_carrier_errors; + __u64 tx_fifo_errors; + __u64 tx_heartbeat_errors; + __u64 tx_window_errors; + + /* for cslip etc */ + __u64 rx_compressed; + __u64 tx_compressed; + + __u64 rx_nohandler; /* dropped, no handler found */ +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap { + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +/* + * IFLA_AF_SPEC + * Contains nested attributes for address family specific attributes. + * Each address family may create a attribute with the address family + * number as type and create its own attribute structure in it. + * + * Example: + * [IFLA_AF_SPEC] = { + * [AF_INET] = { + * [IFLA_INET_CONF] = ..., + * }, + * [AF_INET6] = { + * [IFLA_INET6_FLAGS] = ..., + * [IFLA_INET6_CONF] = ..., + * } + * } + */ + +enum { + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO + IFLA_NET_NS_PID, + IFLA_IFALIAS, + IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ + IFLA_VFINFO_LIST, + IFLA_STATS64, + IFLA_VF_PORTS, + IFLA_PORT_SELF, + IFLA_AF_SPEC, + IFLA_GROUP, /* Group the device belongs to */ + IFLA_NET_NS_FD, + IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ + IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ +#define IFLA_PROMISCUITY IFLA_PROMISCUITY + IFLA_NUM_TX_QUEUES, + IFLA_NUM_RX_QUEUES, + IFLA_CARRIER, + IFLA_PHYS_PORT_ID, + IFLA_CARRIER_CHANGES, + IFLA_PHYS_SWITCH_ID, + IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, + IFLA_PROTO_DOWN, + IFLA_GSO_MAX_SEGS, + IFLA_GSO_MAX_SIZE, + IFLA_PAD, + IFLA_XDP, + IFLA_EVENT, + IFLA_NEW_NETNSID, + IFLA_IF_NETNSID, + IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ + IFLA_CARRIER_UP_COUNT, + IFLA_CARRIER_DOWN_COUNT, + IFLA_NEW_IFINDEX, + IFLA_MIN_MTU, + IFLA_MAX_MTU, + IFLA_PROP_LIST, + IFLA_ALT_IFNAME, /* Alternative ifname */ + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) +#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) +#endif + +enum { + IFLA_INET_UNSPEC, + IFLA_INET_CONF, + __IFLA_INET_MAX, +}; + +#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum { + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ + IFLA_INET6_TOKEN, /* device token */ + IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +enum in6_addr_gen_mode { + IN6_ADDR_GEN_MODE_EUI64, + IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, +}; + +/* Bridge section */ + +enum { + IFLA_BR_UNSPEC, + IFLA_BR_FORWARD_DELAY, + IFLA_BR_HELLO_TIME, + IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, + IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, + IFLA_BR_GROUP_FWD_MASK, + IFLA_BR_ROOT_ID, + IFLA_BR_BRIDGE_ID, + IFLA_BR_ROOT_PORT, + IFLA_BR_ROOT_PATH_COST, + IFLA_BR_TOPOLOGY_CHANGE, + IFLA_BR_TOPOLOGY_CHANGE_DETECTED, + IFLA_BR_HELLO_TIMER, + IFLA_BR_TCN_TIMER, + IFLA_BR_TOPOLOGY_CHANGE_TIMER, + IFLA_BR_GC_TIMER, + IFLA_BR_GROUP_ADDR, + IFLA_BR_FDB_FLUSH, + IFLA_BR_MCAST_ROUTER, + IFLA_BR_MCAST_SNOOPING, + IFLA_BR_MCAST_QUERY_USE_IFADDR, + IFLA_BR_MCAST_QUERIER, + IFLA_BR_MCAST_HASH_ELASTICITY, + IFLA_BR_MCAST_HASH_MAX, + IFLA_BR_MCAST_LAST_MEMBER_CNT, + IFLA_BR_MCAST_STARTUP_QUERY_CNT, + IFLA_BR_MCAST_LAST_MEMBER_INTVL, + IFLA_BR_MCAST_MEMBERSHIP_INTVL, + IFLA_BR_MCAST_QUERIER_INTVL, + IFLA_BR_MCAST_QUERY_INTVL, + IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, + IFLA_BR_MCAST_STARTUP_QUERY_INTVL, + IFLA_BR_NF_CALL_IPTABLES, + IFLA_BR_NF_CALL_IP6TABLES, + IFLA_BR_NF_CALL_ARPTABLES, + IFLA_BR_VLAN_DEFAULT_PVID, + IFLA_BR_PAD, + IFLA_BR_VLAN_STATS_ENABLED, + IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, + IFLA_BR_MULTI_BOOLOPT, + __IFLA_BR_MAX, +}; + +#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) + +struct ifla_bridge_id { + __u8 prio[2]; + __u8 addr[6]; /* ETH_ALEN */ +}; + +enum { + BRIDGE_MODE_UNSPEC, + BRIDGE_MODE_HAIRPIN, +}; + +enum { + IFLA_BRPORT_UNSPEC, + IFLA_BRPORT_STATE, /* Spanning tree state */ + IFLA_BRPORT_PRIORITY, /* " priority */ + IFLA_BRPORT_COST, /* " cost */ + IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ + IFLA_BRPORT_LEARNING, /* mac learning */ + IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ + IFLA_BRPORT_PROXYARP, /* proxy ARP */ + IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ + IFLA_BRPORT_ROOT_ID, /* designated root */ + IFLA_BRPORT_BRIDGE_ID, /* designated bridge */ + IFLA_BRPORT_DESIGNATED_PORT, + IFLA_BRPORT_DESIGNATED_COST, + IFLA_BRPORT_ID, + IFLA_BRPORT_NO, + IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, + IFLA_BRPORT_CONFIG_PENDING, + IFLA_BRPORT_MESSAGE_AGE_TIMER, + IFLA_BRPORT_FORWARD_DELAY_TIMER, + IFLA_BRPORT_HOLD_TIMER, + IFLA_BRPORT_FLUSH, + IFLA_BRPORT_MULTICAST_ROUTER, + IFLA_BRPORT_PAD, + IFLA_BRPORT_MCAST_FLOOD, + IFLA_BRPORT_MCAST_TO_UCAST, + IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, + IFLA_BRPORT_GROUP_FWD_MASK, + IFLA_BRPORT_NEIGH_SUPPRESS, + IFLA_BRPORT_ISOLATED, + IFLA_BRPORT_BACKUP_PORT, + __IFLA_BRPORT_MAX +}; +#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) + +struct ifla_cacheinfo { + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; + +enum { + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + IFLA_INFO_SLAVE_KIND, + IFLA_INFO_SLAVE_DATA, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + +/* VLAN section */ + +enum { + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + IFLA_VLAN_PROTOCOL, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum { + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping { + __u32 from; + __u32 to; +}; + +/* MACVLAN section */ +enum { + IFLA_MACVLAN_UNSPEC, + IFLA_MACVLAN_MODE, + IFLA_MACVLAN_FLAGS, + IFLA_MACVLAN_MACADDR_MODE, + IFLA_MACVLAN_MACADDR, + IFLA_MACVLAN_MACADDR_DATA, + IFLA_MACVLAN_MACADDR_COUNT, + __IFLA_MACVLAN_MAX, +}; + +#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1) + +enum macvlan_mode { + MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ + MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ + MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ + MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ + MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */ +}; + +enum macvlan_macaddr_mode { + MACVLAN_MACADDR_ADD, + MACVLAN_MACADDR_DEL, + MACVLAN_MACADDR_FLUSH, + MACVLAN_MACADDR_SET, +}; + +#define MACVLAN_FLAG_NOPROMISC 1 + +/* VRF section */ +enum { + IFLA_VRF_UNSPEC, + IFLA_VRF_TABLE, + __IFLA_VRF_MAX +}; + +#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) + +enum { + IFLA_VRF_PORT_UNSPEC, + IFLA_VRF_PORT_TABLE, + __IFLA_VRF_PORT_MAX +}; + +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) + +/* MACSEC section */ +enum { + IFLA_MACSEC_UNSPEC, + IFLA_MACSEC_SCI, + IFLA_MACSEC_PORT, + IFLA_MACSEC_ICV_LEN, + IFLA_MACSEC_CIPHER_SUITE, + IFLA_MACSEC_WINDOW, + IFLA_MACSEC_ENCODING_SA, + IFLA_MACSEC_ENCRYPT, + IFLA_MACSEC_PROTECT, + IFLA_MACSEC_INC_SCI, + IFLA_MACSEC_ES, + IFLA_MACSEC_SCB, + IFLA_MACSEC_REPLAY_PROTECT, + IFLA_MACSEC_VALIDATION, + IFLA_MACSEC_PAD, + __IFLA_MACSEC_MAX, +}; + +#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) + +/* XFRM section */ +enum { + IFLA_XFRM_UNSPEC, + IFLA_XFRM_LINK, + IFLA_XFRM_IF_ID, + __IFLA_XFRM_MAX +}; + +#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) + +enum macsec_validation_type { + MACSEC_VALIDATE_DISABLED = 0, + MACSEC_VALIDATE_CHECK = 1, + MACSEC_VALIDATE_STRICT = 2, + __MACSEC_VALIDATE_END, + MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, +}; + +/* IPVLAN section */ +enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, + IFLA_IPVLAN_FLAGS, + __IFLA_IPVLAN_MAX +}; + +#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) + +enum ipvlan_mode { + IPVLAN_MODE_L2 = 0, + IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, + IPVLAN_MODE_MAX +}; + +#define IPVLAN_F_PRIVATE 0x01 +#define IPVLAN_F_VEPA 0x02 + +/* VXLAN section */ +enum { + IFLA_VXLAN_UNSPEC, + IFLA_VXLAN_ID, + IFLA_VXLAN_GROUP, /* group or remote address */ + IFLA_VXLAN_LINK, + IFLA_VXLAN_LOCAL, + IFLA_VXLAN_TTL, + IFLA_VXLAN_TOS, + IFLA_VXLAN_LEARNING, + IFLA_VXLAN_AGEING, + IFLA_VXLAN_LIMIT, + IFLA_VXLAN_PORT_RANGE, /* source port */ + IFLA_VXLAN_PROXY, + IFLA_VXLAN_RSC, + IFLA_VXLAN_L2MISS, + IFLA_VXLAN_L3MISS, + IFLA_VXLAN_PORT, /* destination port */ + IFLA_VXLAN_GROUP6, + IFLA_VXLAN_LOCAL6, + IFLA_VXLAN_UDP_CSUM, + IFLA_VXLAN_UDP_ZERO_CSUM6_TX, + IFLA_VXLAN_UDP_ZERO_CSUM6_RX, + IFLA_VXLAN_REMCSUM_TX, + IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, + IFLA_VXLAN_REMCSUM_NOPARTIAL, + IFLA_VXLAN_COLLECT_METADATA, + IFLA_VXLAN_LABEL, + IFLA_VXLAN_GPE, + IFLA_VXLAN_TTL_INHERIT, + IFLA_VXLAN_DF, + __IFLA_VXLAN_MAX +}; +#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) + +struct ifla_vxlan_port_range { + __be16 low; + __be16 high; +}; + +enum ifla_vxlan_df { + VXLAN_DF_UNSET = 0, + VXLAN_DF_SET, + VXLAN_DF_INHERIT, + __VXLAN_DF_END, + VXLAN_DF_MAX = __VXLAN_DF_END - 1, +}; + +/* GENEVE section */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + IFLA_GENEVE_TTL, + IFLA_GENEVE_TOS, + IFLA_GENEVE_PORT, /* destination port */ + IFLA_GENEVE_COLLECT_METADATA, + IFLA_GENEVE_REMOTE6, + IFLA_GENEVE_UDP_CSUM, + IFLA_GENEVE_UDP_ZERO_CSUM6_TX, + IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + IFLA_GENEVE_LABEL, + IFLA_GENEVE_TTL_INHERIT, + IFLA_GENEVE_DF, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) + +enum ifla_geneve_df { + GENEVE_DF_UNSET = 0, + GENEVE_DF_SET, + GENEVE_DF_INHERIT, + __GENEVE_DF_END, + GENEVE_DF_MAX = __GENEVE_DF_END - 1, +}; + +/* PPP section */ +enum { + IFLA_PPP_UNSPEC, + IFLA_PPP_DEV_FD, + __IFLA_PPP_MAX +}; +#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) + +/* GTP section */ + +enum ifla_gtp_role { + GTP_ROLE_GGSN = 0, + GTP_ROLE_SGSN, +}; + +enum { + IFLA_GTP_UNSPEC, + IFLA_GTP_FD0, + IFLA_GTP_FD1, + IFLA_GTP_PDP_HASHSIZE, + IFLA_GTP_ROLE, + __IFLA_GTP_MAX, +}; +#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) + +/* Bonding section */ + +enum { + IFLA_BOND_UNSPEC, + IFLA_BOND_MODE, + IFLA_BOND_ACTIVE_SLAVE, + IFLA_BOND_MIIMON, + IFLA_BOND_UPDELAY, + IFLA_BOND_DOWNDELAY, + IFLA_BOND_USE_CARRIER, + IFLA_BOND_ARP_INTERVAL, + IFLA_BOND_ARP_IP_TARGET, + IFLA_BOND_ARP_VALIDATE, + IFLA_BOND_ARP_ALL_TARGETS, + IFLA_BOND_PRIMARY, + IFLA_BOND_PRIMARY_RESELECT, + IFLA_BOND_FAIL_OVER_MAC, + IFLA_BOND_XMIT_HASH_POLICY, + IFLA_BOND_RESEND_IGMP, + IFLA_BOND_NUM_PEER_NOTIF, + IFLA_BOND_ALL_SLAVES_ACTIVE, + IFLA_BOND_MIN_LINKS, + IFLA_BOND_LP_INTERVAL, + IFLA_BOND_PACKETS_PER_SLAVE, + IFLA_BOND_AD_LACP_RATE, + IFLA_BOND_AD_SELECT, + IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, + IFLA_BOND_TLB_DYNAMIC_LB, + IFLA_BOND_PEER_NOTIF_DELAY, + __IFLA_BOND_MAX, +}; + +#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) + +enum { + IFLA_BOND_AD_INFO_UNSPEC, + IFLA_BOND_AD_INFO_AGGREGATOR, + IFLA_BOND_AD_INFO_NUM_PORTS, + IFLA_BOND_AD_INFO_ACTOR_KEY, + IFLA_BOND_AD_INFO_PARTNER_KEY, + IFLA_BOND_AD_INFO_PARTNER_MAC, + __IFLA_BOND_AD_INFO_MAX, +}; + +#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) + +enum { + IFLA_BOND_SLAVE_UNSPEC, + IFLA_BOND_SLAVE_STATE, + IFLA_BOND_SLAVE_MII_STATUS, + IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, + IFLA_BOND_SLAVE_PERM_HWADDR, + IFLA_BOND_SLAVE_QUEUE_ID, + IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + __IFLA_BOND_SLAVE_MAX, +}; + +#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1) + +/* SR-IOV virtual function management section */ + +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, /* VLAN ID and QoS */ + IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ + IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ + IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ + IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ + IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query + * on/off switch + */ + IFLA_VF_STATS, /* network device statistics */ + IFLA_VF_TRUST, /* Trust VF */ + IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ + IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ + IFLA_VF_BROADCAST, /* VF broadcast */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + +struct ifla_vf_mac { + __u32 vf; + __u8 mac[32]; /* MAX_ADDR_LEN */ +}; + +struct ifla_vf_broadcast { + __u8 broadcast[32]; +}; + +struct ifla_vf_vlan { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; +}; + +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + +struct ifla_vf_tx_rate { + __u32 vf; + __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ +}; + +struct ifla_vf_rate { + __u32 vf; + __u32 min_tx_rate; /* Min Bandwidth in Mbps */ + __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +}; + +struct ifla_vf_spoofchk { + __u32 vf; + __u32 setting; +}; + +struct ifla_vf_guid { + __u32 vf; + __u64 guid; +}; + +enum { + IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ + IFLA_VF_LINK_STATE_ENABLE, /* link always up */ + IFLA_VF_LINK_STATE_DISABLE, /* link always down */ + __IFLA_VF_LINK_STATE_MAX, +}; + +struct ifla_vf_link_state { + __u32 vf; + __u32 link_state; +}; + +struct ifla_vf_rss_query_en { + __u32 vf; + __u32 setting; +}; + +enum { + IFLA_VF_STATS_RX_PACKETS, + IFLA_VF_STATS_TX_PACKETS, + IFLA_VF_STATS_RX_BYTES, + IFLA_VF_STATS_TX_BYTES, + IFLA_VF_STATS_BROADCAST, + IFLA_VF_STATS_MULTICAST, + IFLA_VF_STATS_PAD, + IFLA_VF_STATS_RX_DROPPED, + IFLA_VF_STATS_TX_DROPPED, + __IFLA_VF_STATS_MAX, +}; + +#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) + +struct ifla_vf_trust { + __u32 vf; + __u32 setting; +}; + +/* VF ports management section + * + * Nested layout of set/get msg is: + * + * [IFLA_NUM_VF] + * [IFLA_VF_PORTS] + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * ... + * [IFLA_PORT_SELF] + * [IFLA_PORT_*], ... + */ + +enum { + IFLA_VF_PORT_UNSPEC, + IFLA_VF_PORT, /* nest */ + __IFLA_VF_PORT_MAX, +}; + +#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1) + +enum { + IFLA_PORT_UNSPEC, + IFLA_PORT_VF, /* __u32 */ + IFLA_PORT_PROFILE, /* string */ + IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */ + IFLA_PORT_INSTANCE_UUID, /* binary UUID */ + IFLA_PORT_HOST_UUID, /* binary UUID */ + IFLA_PORT_REQUEST, /* __u8 */ + IFLA_PORT_RESPONSE, /* __u16, output only */ + __IFLA_PORT_MAX, +}; + +#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1) + +#define PORT_PROFILE_MAX 40 +#define PORT_UUID_MAX 16 +#define PORT_SELF_VF -1 + +enum { + PORT_REQUEST_PREASSOCIATE = 0, + PORT_REQUEST_PREASSOCIATE_RR, + PORT_REQUEST_ASSOCIATE, + PORT_REQUEST_DISASSOCIATE, +}; + +enum { + PORT_VDP_RESPONSE_SUCCESS = 0, + PORT_VDP_RESPONSE_INVALID_FORMAT, + PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_VDP_RESPONSE_UNUSED_VTID, + PORT_VDP_RESPONSE_VTID_VIOLATION, + PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION, + PORT_VDP_RESPONSE_OUT_OF_SYNC, + /* 0x08-0xFF reserved for future VDP use */ + PORT_PROFILE_RESPONSE_SUCCESS = 0x100, + PORT_PROFILE_RESPONSE_INPROGRESS, + PORT_PROFILE_RESPONSE_INVALID, + PORT_PROFILE_RESPONSE_BADSTATE, + PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_PROFILE_RESPONSE_ERROR, +}; + +struct ifla_port_vsi { + __u8 vsi_mgr_id; + __u8 vsi_type_id[3]; + __u8 vsi_type_version; + __u8 pad[3]; +}; + + +/* IPoIB section */ + +enum { + IFLA_IPOIB_UNSPEC, + IFLA_IPOIB_PKEY, + IFLA_IPOIB_MODE, + IFLA_IPOIB_UMCAST, + __IFLA_IPOIB_MAX +}; + +enum { + IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */ + IPOIB_MODE_CONNECTED = 1, /* using connected QPs */ +}; + +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) + + +/* HSR section */ + +enum { + IFLA_HSR_UNSPEC, + IFLA_HSR_SLAVE1, + IFLA_HSR_SLAVE2, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, + IFLA_HSR_VERSION, /* HSR version */ + __IFLA_HSR_MAX, +}; + +#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) + +/* STATS section */ + +struct if_stats_msg { + __u8 family; + __u8 pad1; + __u16 pad2; + __u32 ifindex; + __u32 filter_mask; +}; + +/* A stats attribute can be netdev specific or a global stat. + * For netdev stats, lets use the prefix IFLA_STATS_LINK_* + */ +enum { + IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ + IFLA_STATS_LINK_64, + IFLA_STATS_LINK_XSTATS, + IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, + IFLA_STATS_AF_SPEC, + __IFLA_STATS_MAX, +}; + +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1) + +#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) + +/* These are embedded into IFLA_STATS_LINK_XSTATS: + * [IFLA_STATS_LINK_XSTATS] + * -> [LINK_XSTATS_TYPE_xxx] + * -> [rtnl link type specific attributes] + */ +enum { + LINK_XSTATS_TYPE_UNSPEC, + LINK_XSTATS_TYPE_BRIDGE, + LINK_XSTATS_TYPE_BOND, + __LINK_XSTATS_TYPE_MAX +}; +#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) + +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + +/* XDP section */ + +#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) +#define XDP_FLAGS_SKB_MODE (1U << 1) +#define XDP_FLAGS_DRV_MODE (1U << 2) +#define XDP_FLAGS_HW_MODE (1U << 3) +#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE | \ + XDP_FLAGS_HW_MODE) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ + XDP_FLAGS_MODES) + +/* These are stored into IFLA_XDP_ATTACHED on dump. */ +enum { + XDP_ATTACHED_NONE = 0, + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, + XDP_ATTACHED_HW, + XDP_ATTACHED_MULTI, +}; + +enum { + IFLA_XDP_UNSPEC, + IFLA_XDP_FD, + IFLA_XDP_ATTACHED, + IFLA_XDP_FLAGS, + IFLA_XDP_PROG_ID, + IFLA_XDP_DRV_PROG_ID, + IFLA_XDP_SKB_PROG_ID, + IFLA_XDP_HW_PROG_ID, + __IFLA_XDP_MAX, +}; + +#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) + +enum { + IFLA_EVENT_NONE, + IFLA_EVENT_REBOOT, /* internal reset / reboot */ + IFLA_EVENT_FEATURES, /* change in offload features */ + IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */ + IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */ + IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */ + IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ +}; + +/* tun section */ + +enum { + IFLA_TUN_UNSPEC, + IFLA_TUN_OWNER, + IFLA_TUN_GROUP, + IFLA_TUN_TYPE, + IFLA_TUN_PI, + IFLA_TUN_VNET_HDR, + IFLA_TUN_PERSIST, + IFLA_TUN_MULTI_QUEUE, + IFLA_TUN_NUM_QUEUES, + IFLA_TUN_NUM_DISABLED_QUEUES, + __IFLA_TUN_MAX, +}; + +#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) + +/* rmnet section */ + +#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) +#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) + +enum { + IFLA_RMNET_UNSPEC, + IFLA_RMNET_MUX_ID, + IFLA_RMNET_FLAGS, + __IFLA_RMNET_MAX, +}; + +#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) + +struct ifla_rmnet_flags { + __u32 flags; + __u32 mask; +}; + +#endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/src/contrib/libbpf/include/uapi/linux/if_xdp.h b/src/contrib/libbpf/include/uapi/linux/if_xdp.h new file mode 100644 index 0000000..be328c5 --- /dev/null +++ b/src/contrib/libbpf/include/uapi/linux/if_xdp.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * if_xdp: XDP socket user-space interface + * Copyright(c) 2018 Intel Corporation. + * + * Author(s): Björn Töpel <bjorn.topel@intel.com> + * Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#ifndef _LINUX_IF_XDP_H +#define _LINUX_IF_XDP_H + +#include <linux/types.h> + +/* Options for the sxdp_flags field */ +#define XDP_SHARED_UMEM (1 << 0) +#define XDP_COPY (1 << 1) /* Force copy-mode */ +#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ +/* If this option is set, the driver might go sleep and in that case + * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be + * set. If it is set, the application need to explicitly wake up the + * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are + * running the driver and the application on the same core, you should + * use this option so that the kernel will yield to the user space + * application. + */ +#define XDP_USE_NEED_WAKEUP (1 << 3) + +/* Flags for xsk_umem_config flags */ +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) + +struct sockaddr_xdp { + __u16 sxdp_family; + __u16 sxdp_flags; + __u32 sxdp_ifindex; + __u32 sxdp_queue_id; + __u32 sxdp_shared_umem_fd; +}; + +/* XDP_RING flags */ +#define XDP_RING_NEED_WAKEUP (1 << 0) + +struct xdp_ring_offset { + __u64 producer; + __u64 consumer; + __u64 desc; + __u64 flags; +}; + +struct xdp_mmap_offsets { + struct xdp_ring_offset rx; + struct xdp_ring_offset tx; + struct xdp_ring_offset fr; /* Fill */ + struct xdp_ring_offset cr; /* Completion */ +}; + +/* XDP socket options */ +#define XDP_MMAP_OFFSETS 1 +#define XDP_RX_RING 2 +#define XDP_TX_RING 3 +#define XDP_UMEM_REG 4 +#define XDP_UMEM_FILL_RING 5 +#define XDP_UMEM_COMPLETION_RING 6 +#define XDP_STATISTICS 7 +#define XDP_OPTIONS 8 + +struct xdp_umem_reg { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; + __u32 flags; +}; + +struct xdp_statistics { + __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ + __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ + __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ +}; + +struct xdp_options { + __u32 flags; +}; + +/* Flags for the flags field of struct xdp_options */ +#define XDP_OPTIONS_ZEROCOPY (1 << 0) + +/* Pgoff for mmaping the rings */ +#define XDP_PGOFF_RX_RING 0 +#define XDP_PGOFF_TX_RING 0x80000000 +#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL +#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL + +/* Masks for unaligned chunks mode */ +#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48 +#define XSK_UNALIGNED_BUF_ADDR_MASK \ + ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) + +/* Rx/Tx descriptor */ +struct xdp_desc { + __u64 addr; + __u32 len; + __u32 options; +}; + +/* UMEM descriptor is __u64 */ + +#endif /* _LINUX_IF_XDP_H */ diff --git a/src/contrib/libbpf/include/uapi/linux/netlink.h b/src/contrib/libbpf/include/uapi/linux/netlink.h new file mode 100644 index 0000000..0a4d733 --- /dev/null +++ b/src/contrib/libbpf/include/uapi/linux/netlink.h @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETLINK_H +#define _UAPI__LINUX_NETLINK_H + +#include <linux/kernel.h> +#include <linux/socket.h> /* for __kernel_sa_family_t */ +#include <linux/types.h> + +#define NETLINK_ROUTE 0 /* Routing/device hook */ +#define NETLINK_UNUSED 1 /* Unused number */ +#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ +#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ +#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ +#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ +#define NETLINK_XFRM 6 /* ipsec */ +#define NETLINK_SELINUX 7 /* SELinux event notifications */ +#define NETLINK_ISCSI 8 /* Open-iSCSI */ +#define NETLINK_AUDIT 9 /* auditing */ +#define NETLINK_FIB_LOOKUP 10 +#define NETLINK_CONNECTOR 11 +#define NETLINK_NETFILTER 12 /* netfilter subsystem */ +#define NETLINK_IP6_FW 13 +#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ +#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ +#define NETLINK_GENERIC 16 +/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ +#define NETLINK_ECRYPTFS 19 +#define NETLINK_RDMA 20 +#define NETLINK_CRYPTO 21 /* Crypto layer */ +#define NETLINK_SMC 22 /* SMC monitoring */ + +#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG + +#define MAX_LINKS 32 + +struct sockaddr_nl { + __kernel_sa_family_t nl_family; /* AF_NETLINK */ + unsigned short nl_pad; /* zero */ + __u32 nl_pid; /* port ID */ + __u32 nl_groups; /* multicast groups mask */ +}; + +struct nlmsghdr { + __u32 nlmsg_len; /* Length of message including header */ + __u16 nlmsg_type; /* Message content */ + __u16 nlmsg_flags; /* Additional flags */ + __u32 nlmsg_seq; /* Sequence number */ + __u32 nlmsg_pid; /* Sending process port ID */ +}; + +/* Flags values */ + +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ + +/* Modifiers to GET request */ +#define NLM_F_ROOT 0x100 /* specify tree root */ +#define NLM_F_MATCH 0x200 /* return all matching */ +#define NLM_F_ATOMIC 0x400 /* atomic GET */ +#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) + +/* Modifiers to NEW request */ +#define NLM_F_REPLACE 0x100 /* Override existing */ +#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ +#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ +#define NLM_F_APPEND 0x800 /* Add to end of list */ + +/* Modifiers to DELETE request */ +#define NLM_F_NONREC 0x100 /* Do not delete recursively */ + +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + +/* + 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL + 4.4BSD CHANGE NLM_F_REPLACE + + True CHANGE NLM_F_CREATE|NLM_F_REPLACE + Append NLM_F_CREATE + Check NLM_F_EXCL + */ + +#define NLMSG_ALIGNTO 4U +#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) +#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) +#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ + (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) +#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len <= (len)) +#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len))) + +#define NLMSG_NOOP 0x1 /* Nothing. */ +#define NLMSG_ERROR 0x2 /* Error */ +#define NLMSG_DONE 0x3 /* End of a dump */ +#define NLMSG_OVERRUN 0x4 /* Data lost */ + +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + +struct nlmsgerr { + int error; + struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 +}; + +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 +#define NETLINK_BROADCAST_ERROR 4 +#define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 +#endif +#define NETLINK_LISTEN_ALL_NSID 8 +#define NETLINK_LIST_MEMBERSHIPS 9 +#define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 +#define NETLINK_GET_STRICT_CHK 12 + +struct nl_pktinfo { + __u32 group; +}; + +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +#ifndef __KERNEL__ +enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, + NL_MMAP_STATUS_VALID, + NL_MMAP_STATUS_COPY, + NL_MMAP_STATUS_SKIP, +}; + +#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO +#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) +#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif + +#define NET_MAJOR 36 /* Major 36 is reserved for networking */ + +enum { + NETLINK_UNCONNECTED = 0, + NETLINK_CONNECTED, +}; + +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr { + __u16 nla_len; + __u16 nla_type; +}; + +/* + * nla_type (16 bits) + * +---+---+-------------------------------+ + * | N | O | Attribute Type | + * +---+---+-------------------------------+ + * N := Carries nested attributes + * O := Payload stored in network byte order + * + * Note: The N and O flag are mutually exclusive. + */ +#define NLA_F_NESTED (1 << 15) +#define NLA_F_NET_BYTEORDER (1 << 14) +#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; + +#endif /* _UAPI__LINUX_NETLINK_H */ diff --git a/src/contrib/libngtcp2/LICENSE b/src/contrib/libngtcp2/LICENSE new file mode 100644 index 0000000..0161703 --- /dev/null +++ b/src/contrib/libngtcp2/LICENSE @@ -0,0 +1 @@ +../licenses/MIT
\ No newline at end of file diff --git a/src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c b/src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c new file mode 100644 index 0000000..73ea0c1 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c @@ -0,0 +1,644 @@ +/* + * ngtcp2 + * + * Copyright (c) 2020 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <assert.h> + +#include <ngtcp2/ngtcp2_crypto.h> +#include <ngtcp2/ngtcp2_crypto_gnutls.h> + +#include <gnutls/gnutls.h> +#include <gnutls/crypto.h> +#include <string.h> + +#include "shared.h" + +ngtcp2_crypto_aead *ngtcp2_crypto_aead_aes_128_gcm(ngtcp2_crypto_aead *aead) { + return ngtcp2_crypto_aead_init(aead, (void *)GNUTLS_CIPHER_AES_128_GCM); +} + +ngtcp2_crypto_md *ngtcp2_crypto_md_sha256(ngtcp2_crypto_md *md) { + md->native_handle = (void *)GNUTLS_DIG_SHA256; + return md; +} + +ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_initial(ngtcp2_crypto_ctx *ctx) { + ngtcp2_crypto_aead_init(&ctx->aead, (void *)GNUTLS_CIPHER_AES_128_GCM); + ctx->md.native_handle = (void *)GNUTLS_DIG_SHA256; + ctx->hp.native_handle = (void *)GNUTLS_CIPHER_AES_128_CBC; + ctx->max_encryption = 0; + ctx->max_decryption_failure = 0; + return ctx; +} + +ngtcp2_crypto_aead *ngtcp2_crypto_aead_init(ngtcp2_crypto_aead *aead, + void *aead_native_handle) { + aead->native_handle = aead_native_handle; + aead->max_overhead = gnutls_cipher_get_tag_size( + (gnutls_cipher_algorithm_t)(intptr_t)aead_native_handle); + return aead; +} + +ngtcp2_crypto_aead *ngtcp2_crypto_aead_retry(ngtcp2_crypto_aead *aead) { + return ngtcp2_crypto_aead_init(aead, (void *)GNUTLS_CIPHER_AES_128_GCM); +} + +static gnutls_cipher_algorithm_t +crypto_get_hp(gnutls_cipher_algorithm_t cipher) { + switch (cipher) { + case GNUTLS_CIPHER_AES_128_GCM: + case GNUTLS_CIPHER_AES_128_CCM: + return GNUTLS_CIPHER_AES_128_CBC; + case GNUTLS_CIPHER_AES_256_GCM: + case GNUTLS_CIPHER_AES_256_CCM: + return GNUTLS_CIPHER_AES_256_CBC; + case GNUTLS_CIPHER_CHACHA20_POLY1305: + return GNUTLS_CIPHER_CHACHA20_32; + default: + return GNUTLS_CIPHER_UNKNOWN; + } +} + +static uint64_t +crypto_get_aead_max_encryption(gnutls_cipher_algorithm_t cipher) { + switch (cipher) { + case GNUTLS_CIPHER_AES_128_GCM: + case GNUTLS_CIPHER_AES_256_GCM: + return NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_GCM; + case GNUTLS_CIPHER_CHACHA20_POLY1305: + return NGTCP2_CRYPTO_MAX_ENCRYPTION_CHACHA20_POLY1305; + case GNUTLS_CIPHER_AES_128_CCM: + case GNUTLS_CIPHER_AES_256_CCM: + return NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_CCM; + default: + return 0; + } +} + +static uint64_t +crypto_get_aead_max_decryption_failure(gnutls_cipher_algorithm_t cipher) { + switch (cipher) { + case GNUTLS_CIPHER_AES_128_GCM: + case GNUTLS_CIPHER_AES_256_GCM: + return NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_GCM; + case GNUTLS_CIPHER_CHACHA20_POLY1305: + return NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_CHACHA20_POLY1305; + case GNUTLS_CIPHER_AES_128_CCM: + case GNUTLS_CIPHER_AES_256_CCM: + return NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_CCM; + default: + return 0; + } +} + +ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_tls(ngtcp2_crypto_ctx *ctx, + void *tls_native_handle) { + gnutls_session_t session = tls_native_handle; + gnutls_cipher_algorithm_t cipher; + gnutls_digest_algorithm_t hash; + gnutls_cipher_algorithm_t hp_cipher; + + cipher = gnutls_cipher_get(session); + if (cipher != GNUTLS_CIPHER_UNKNOWN && cipher != GNUTLS_CIPHER_NULL) { + ngtcp2_crypto_aead_init(&ctx->aead, (void *)cipher); + } + + hash = gnutls_prf_hash_get(session); + if (hash != GNUTLS_DIG_UNKNOWN && hash != GNUTLS_DIG_NULL) { + ctx->md.native_handle = (void *)hash; + } + + hp_cipher = crypto_get_hp(cipher); + if (hp_cipher != GNUTLS_CIPHER_UNKNOWN) { + ctx->hp.native_handle = (void *)hp_cipher; + } + + ctx->max_encryption = crypto_get_aead_max_encryption(cipher); + ctx->max_decryption_failure = crypto_get_aead_max_decryption_failure(cipher); + + return ctx; +} + +ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_tls_early(ngtcp2_crypto_ctx *ctx, + void *tls_native_handle) { + gnutls_session_t session = tls_native_handle; + gnutls_cipher_algorithm_t cipher; + gnutls_digest_algorithm_t hash; + gnutls_cipher_algorithm_t hp_cipher; + + cipher = gnutls_early_cipher_get(session); + if (cipher != GNUTLS_CIPHER_UNKNOWN && cipher != GNUTLS_CIPHER_NULL) { + ngtcp2_crypto_aead_init(&ctx->aead, (void *)cipher); + } + + hash = gnutls_early_prf_hash_get(session); + if (hash != GNUTLS_DIG_UNKNOWN && hash != GNUTLS_DIG_NULL) { + ctx->md.native_handle = (void *)hash; + } + + hp_cipher = crypto_get_hp(cipher); + if (hp_cipher != GNUTLS_CIPHER_UNKNOWN) { + ctx->hp.native_handle = (void *)hp_cipher; + } + + ctx->max_encryption = crypto_get_aead_max_encryption(cipher); + ctx->max_decryption_failure = crypto_get_aead_max_decryption_failure(cipher); + + return ctx; +} + +size_t ngtcp2_crypto_md_hashlen(const ngtcp2_crypto_md *md) { + return gnutls_hash_get_len( + (gnutls_digest_algorithm_t)(intptr_t)md->native_handle); +} + +size_t ngtcp2_crypto_aead_keylen(const ngtcp2_crypto_aead *aead) { + return gnutls_cipher_get_key_size( + (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle); +} + +size_t ngtcp2_crypto_aead_noncelen(const ngtcp2_crypto_aead *aead) { + return gnutls_cipher_get_iv_size( + (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle); +} + +int ngtcp2_crypto_aead_ctx_encrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx, + const ngtcp2_crypto_aead *aead, + const uint8_t *key, size_t noncelen) { + gnutls_cipher_algorithm_t cipher = + (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle; + gnutls_aead_cipher_hd_t hd; + gnutls_datum_t _key; + + (void)noncelen; + + _key.data = (void *)key; + _key.size = (unsigned int)ngtcp2_crypto_aead_keylen(aead); + + if (gnutls_aead_cipher_init(&hd, cipher, &_key) != 0) { + return -1; + } + + aead_ctx->native_handle = hd; + + return 0; +} + +int ngtcp2_crypto_aead_ctx_decrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx, + const ngtcp2_crypto_aead *aead, + const uint8_t *key, size_t noncelen) { + gnutls_cipher_algorithm_t cipher = + (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle; + gnutls_aead_cipher_hd_t hd; + gnutls_datum_t _key; + + (void)noncelen; + + _key.data = (void *)key; + _key.size = (unsigned int)ngtcp2_crypto_aead_keylen(aead); + + if (gnutls_aead_cipher_init(&hd, cipher, &_key) != 0) { + return -1; + } + + aead_ctx->native_handle = hd; + + return 0; +} + +void ngtcp2_crypto_aead_ctx_free(ngtcp2_crypto_aead_ctx *aead_ctx) { + if (aead_ctx->native_handle) { + gnutls_aead_cipher_deinit(aead_ctx->native_handle); + } +} + +int ngtcp2_crypto_cipher_ctx_encrypt_init(ngtcp2_crypto_cipher_ctx *cipher_ctx, + const ngtcp2_crypto_cipher *cipher, + const uint8_t *key) { + gnutls_cipher_algorithm_t _cipher = + (gnutls_cipher_algorithm_t)(intptr_t)cipher->native_handle; + gnutls_cipher_hd_t hd; + gnutls_datum_t _key; + + _key.data = (void *)key; + _key.size = (unsigned int)gnutls_cipher_get_key_size(_cipher); + + if (gnutls_cipher_init(&hd, _cipher, &_key, NULL) != 0) { + return -1; + } + + cipher_ctx->native_handle = hd; + + return 0; +} + +void ngtcp2_crypto_cipher_ctx_free(ngtcp2_crypto_cipher_ctx *cipher_ctx) { + if (cipher_ctx->native_handle) { + gnutls_cipher_deinit(cipher_ctx->native_handle); + } +} + +int ngtcp2_crypto_hkdf_extract(uint8_t *dest, const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen, + const uint8_t *salt, size_t saltlen) { + gnutls_mac_algorithm_t prf = + (gnutls_mac_algorithm_t)(intptr_t)md->native_handle; + gnutls_datum_t _secret = {(void *)secret, (unsigned int)secretlen}; + gnutls_datum_t _salt = {(void *)salt, (unsigned int)saltlen}; + + if (gnutls_hkdf_extract(prf, &_secret, &_salt, dest) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_hkdf_expand(uint8_t *dest, size_t destlen, + const ngtcp2_crypto_md *md, const uint8_t *secret, + size_t secretlen, const uint8_t *info, + size_t infolen) { + gnutls_mac_algorithm_t prf = + (gnutls_mac_algorithm_t)(intptr_t)md->native_handle; + gnutls_datum_t _secret = {(void *)secret, (unsigned int)secretlen}; + gnutls_datum_t _info = {(void *)info, (unsigned int)infolen}; + + if (gnutls_hkdf_expand(prf, &_secret, &_info, dest, destlen) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_hkdf(uint8_t *dest, size_t destlen, + const ngtcp2_crypto_md *md, const uint8_t *secret, + size_t secretlen, const uint8_t *salt, size_t saltlen, + const uint8_t *info, size_t infolen) { + gnutls_mac_algorithm_t prf = + (gnutls_mac_algorithm_t)(intptr_t)md->native_handle; + size_t keylen = ngtcp2_crypto_md_hashlen(md); + uint8_t key[64]; + gnutls_datum_t _secret = {(void *)secret, (unsigned int)secretlen}; + gnutls_datum_t _key = {(void *)key, (unsigned int)keylen}; + gnutls_datum_t _salt = {(void *)salt, (unsigned int)saltlen}; + gnutls_datum_t _info = {(void *)info, (unsigned int)infolen}; + + assert(keylen <= sizeof(key)); + + if (gnutls_hkdf_extract(prf, &_secret, &_salt, key) != 0) { + return -1; + } + + if (gnutls_hkdf_expand(prf, &_key, &_info, dest, destlen) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_encrypt(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *plaintext, size_t plaintextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen) { + gnutls_cipher_algorithm_t cipher = + (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle; + gnutls_aead_cipher_hd_t hd = aead_ctx->native_handle; + size_t taglen = gnutls_cipher_get_tag_size(cipher); + size_t ciphertextlen = plaintextlen + taglen; + + if (gnutls_aead_cipher_encrypt(hd, nonce, noncelen, aad, aadlen, taglen, + plaintext, plaintextlen, dest, + &ciphertextlen) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_decrypt(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *ciphertext, size_t ciphertextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen) { + gnutls_cipher_algorithm_t cipher = + (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle; + gnutls_aead_cipher_hd_t hd = aead_ctx->native_handle; + size_t taglen = gnutls_cipher_get_tag_size(cipher); + size_t plaintextlen; + + if (taglen > ciphertextlen) { + return -1; + } + + plaintextlen = ciphertextlen - taglen; + + if (gnutls_aead_cipher_decrypt(hd, nonce, noncelen, aad, aadlen, taglen, + ciphertext, ciphertextlen, dest, + &plaintextlen) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_hp_mask(uint8_t *dest, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx, + const uint8_t *sample) { + gnutls_cipher_algorithm_t cipher = + (gnutls_cipher_algorithm_t)(intptr_t)hp->native_handle; + gnutls_cipher_hd_t hd = hp_ctx->native_handle; + + switch (cipher) { + case GNUTLS_CIPHER_AES_128_CBC: + case GNUTLS_CIPHER_AES_256_CBC: { + uint8_t iv[16]; + uint8_t buf[16]; + + /* Emulate one block AES-ECB by invalidating the effect of IV */ + memset(iv, 0, sizeof(iv)); + + gnutls_cipher_set_iv(hd, iv, sizeof(iv)); + + if (gnutls_cipher_encrypt2(hd, sample, 16, buf, sizeof(buf)) != 0) { + return -1; + } + + memcpy(dest, buf, 5); + } break; + + case GNUTLS_CIPHER_CHACHA20_32: { + static const uint8_t PLAINTEXT[] = "\x00\x00\x00\x00\x00"; + uint8_t buf[5 + 16]; + size_t buflen = sizeof(buf); + + gnutls_cipher_set_iv(hd, (void *)sample, 16); + + if (gnutls_cipher_encrypt2(hd, PLAINTEXT, sizeof(PLAINTEXT) - 1, buf, + buflen) != 0) { + return -1; + } + + memcpy(dest, buf, 5); + } break; + default: + assert(0); + } + + return 0; +} + +ngtcp2_crypto_level ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level( + gnutls_record_encryption_level_t gtls_level) { + switch (gtls_level) { + case GNUTLS_ENCRYPTION_LEVEL_INITIAL: + return NGTCP2_CRYPTO_LEVEL_INITIAL; + case GNUTLS_ENCRYPTION_LEVEL_HANDSHAKE: + return NGTCP2_CRYPTO_LEVEL_HANDSHAKE; + case GNUTLS_ENCRYPTION_LEVEL_APPLICATION: + return NGTCP2_CRYPTO_LEVEL_APPLICATION; + case GNUTLS_ENCRYPTION_LEVEL_EARLY: + return NGTCP2_CRYPTO_LEVEL_EARLY; + default: + assert(0); + abort(); + } +} + +gnutls_record_encryption_level_t +ngtcp2_crypto_gnutls_from_ngtcp2_level(ngtcp2_crypto_level crypto_level) { + switch (crypto_level) { + case NGTCP2_CRYPTO_LEVEL_INITIAL: + return GNUTLS_ENCRYPTION_LEVEL_INITIAL; + case NGTCP2_CRYPTO_LEVEL_HANDSHAKE: + return GNUTLS_ENCRYPTION_LEVEL_HANDSHAKE; + case NGTCP2_CRYPTO_LEVEL_APPLICATION: + return GNUTLS_ENCRYPTION_LEVEL_APPLICATION; + case NGTCP2_CRYPTO_LEVEL_EARLY: + return GNUTLS_ENCRYPTION_LEVEL_EARLY; + default: + assert(0); + abort(); + } +} + +int ngtcp2_crypto_read_write_crypto_data(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + const uint8_t *data, size_t datalen) { + gnutls_session_t session = ngtcp2_conn_get_tls_native_handle(conn); + int rv; + + if (datalen > 0) { + rv = gnutls_handshake_write( + session, ngtcp2_crypto_gnutls_from_ngtcp2_level(crypto_level), data, + datalen); + if (rv != 0) { + if (!gnutls_error_is_fatal(rv)) { + return 0; + } + gnutls_alert_send_appropriate(session, rv); + return -1; + } + } + + if (!ngtcp2_conn_get_handshake_completed(conn)) { + rv = gnutls_handshake(session); + if (rv < 0) { + if (!gnutls_error_is_fatal(rv)) { + return 0; + } + gnutls_alert_send_appropriate(session, rv); + return -1; + } + + ngtcp2_conn_handshake_completed(conn); + } + + return 0; +} + +int ngtcp2_crypto_set_remote_transport_params(ngtcp2_conn *conn, void *tls) { + (void)conn; + (void)tls; + /* Nothing to do; GnuTLS applications are supposed to register the + quic_transport_parameters extension with + gnutls_session_ext_register. */ + return 0; +} + +int ngtcp2_crypto_set_local_transport_params(void *tls, const uint8_t *buf, + size_t len) { + (void)tls; + (void)buf; + (void)len; + /* Nothing to do; GnuTLS applications are supposed to register the + quic_transport_parameters extension with + gnutls_session_ext_register. */ + return 0; +} + +int ngtcp2_crypto_get_path_challenge_data_cb(ngtcp2_conn *conn, uint8_t *data, + void *user_data) { + (void)conn; + (void)user_data; + + if (gnutls_rnd(GNUTLS_RND_RANDOM, data, NGTCP2_PATH_CHALLENGE_DATALEN) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +int ngtcp2_crypto_random(uint8_t *data, size_t datalen) { + if (gnutls_rnd(GNUTLS_RND_RANDOM, data, datalen) != 0) { + return -1; + } + + return 0; +} + +static int secret_func(gnutls_session_t session, + gnutls_record_encryption_level_t gtls_level, + const void *rx_secret, const void *tx_secret, + size_t secretlen) { + ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session); + ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); + ngtcp2_crypto_level level = + ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level(gtls_level); + + if (rx_secret && + ngtcp2_crypto_derive_and_install_rx_key(conn, NULL, NULL, NULL, level, + rx_secret, secretlen) != 0) { + return -1; + } + + if (tx_secret && + ngtcp2_crypto_derive_and_install_tx_key(conn, NULL, NULL, NULL, level, + tx_secret, secretlen) != 0) { + return -1; + } + + return 0; +} + +static int read_func(gnutls_session_t session, + gnutls_record_encryption_level_t gtls_level, + gnutls_handshake_description_t htype, const void *data, + size_t datalen) { + ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session); + ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); + ngtcp2_crypto_level level = + ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level(gtls_level); + int rv; + + if (htype == GNUTLS_HANDSHAKE_CHANGE_CIPHER_SPEC) { + return 0; + } + + rv = ngtcp2_conn_submit_crypto_data(conn, level, data, datalen); + if (rv != 0) { + ngtcp2_conn_set_tls_error(conn, rv); + return -1; + } + + return 0; +} + +static int alert_read_func(gnutls_session_t session, + gnutls_record_encryption_level_t gtls_level, + gnutls_alert_level_t alert_level, + gnutls_alert_description_t alert_desc) { + ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session); + ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); + (void)gtls_level; + (void)alert_level; + + ngtcp2_conn_set_tls_alert(conn, (uint8_t)alert_desc); + + return 0; +} + +static int tp_recv_func(gnutls_session_t session, const uint8_t *data, + size_t datalen) { + ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session); + ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); + int rv; + + rv = ngtcp2_conn_decode_remote_transport_params(conn, data, datalen); + if (rv != 0) { + ngtcp2_conn_set_tls_error(conn, rv); + return -1; + } + + return 0; +} + +static int tp_send_func(gnutls_session_t session, gnutls_buffer_t extdata) { + ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session); + ngtcp2_conn *conn = conn_ref->get_conn(conn_ref); + uint8_t buf[256]; + ngtcp2_ssize nwrite; + int rv; + + nwrite = ngtcp2_conn_encode_local_transport_params(conn, buf, sizeof(buf)); + if (nwrite < 0) { + return -1; + } + + rv = gnutls_buffer_append_data(extdata, buf, (size_t)nwrite); + if (rv != 0) { + return -1; + } + + return 0; +} + +static int crypto_gnutls_configure_session(gnutls_session_t session) { + int rv; + + gnutls_handshake_set_secret_function(session, secret_func); + gnutls_handshake_set_read_function(session, read_func); + gnutls_alert_set_read_function(session, alert_read_func); + + rv = gnutls_session_ext_register( + session, "QUIC Transport Parameters", + NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_V1, GNUTLS_EXT_TLS, tp_recv_func, + tp_send_func, NULL, NULL, NULL, + GNUTLS_EXT_FLAG_TLS | GNUTLS_EXT_FLAG_CLIENT_HELLO | GNUTLS_EXT_FLAG_EE); + if (rv != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_gnutls_configure_server_session(gnutls_session_t session) { + return crypto_gnutls_configure_session(session); +} + +int ngtcp2_crypto_gnutls_configure_client_session(gnutls_session_t session) { + return crypto_gnutls_configure_session(session); +} diff --git a/src/contrib/libngtcp2/ngtcp2/crypto/shared.c b/src/contrib/libngtcp2/ngtcp2/crypto/shared.c new file mode 100644 index 0000000..f38e0b1 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/crypto/shared.c @@ -0,0 +1,1418 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "shared.h" + +#ifdef WIN32 +# include <winsock2.h> +# include <ws2tcpip.h> +#else +# include <netinet/in.h> +#endif + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_macro.h" +#include "ngtcp2_net.h" + +ngtcp2_crypto_md *ngtcp2_crypto_md_init(ngtcp2_crypto_md *md, + void *md_native_handle) { + md->native_handle = md_native_handle; + return md; +} + +int ngtcp2_crypto_hkdf_expand_label(uint8_t *dest, size_t destlen, + const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen, + const uint8_t *label, size_t labellen) { + static const uint8_t LABEL[] = "tls13 "; + uint8_t info[256]; + uint8_t *p = info; + + *p++ = (uint8_t)(destlen / 256); + *p++ = (uint8_t)(destlen % 256); + *p++ = (uint8_t)(sizeof(LABEL) - 1 + labellen); + memcpy(p, LABEL, sizeof(LABEL) - 1); + p += sizeof(LABEL) - 1; + memcpy(p, label, labellen); + p += labellen; + *p++ = 0; + + return ngtcp2_crypto_hkdf_expand(dest, destlen, md, secret, secretlen, info, + (size_t)(p - info)); +} + +#define NGTCP2_CRYPTO_INITIAL_SECRETLEN 32 + +int ngtcp2_crypto_derive_initial_secrets(uint32_t version, uint8_t *rx_secret, + uint8_t *tx_secret, + uint8_t *initial_secret, + const ngtcp2_cid *client_dcid, + ngtcp2_crypto_side side) { + static const uint8_t CLABEL[] = "client in"; + static const uint8_t SLABEL[] = "server in"; + uint8_t initial_secret_buf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t *client_secret; + uint8_t *server_secret; + ngtcp2_crypto_ctx ctx; + const uint8_t *salt; + size_t saltlen; + + if (!initial_secret) { + initial_secret = initial_secret_buf; + } + + ngtcp2_crypto_ctx_initial(&ctx); + + switch (version) { + case NGTCP2_PROTO_VER_V1: + salt = (const uint8_t *)NGTCP2_INITIAL_SALT_V1; + saltlen = sizeof(NGTCP2_INITIAL_SALT_V1) - 1; + break; + case NGTCP2_PROTO_VER_V2: + salt = (const uint8_t *)NGTCP2_INITIAL_SALT_V2; + saltlen = sizeof(NGTCP2_INITIAL_SALT_V2) - 1; + break; + default: + salt = (const uint8_t *)NGTCP2_INITIAL_SALT_DRAFT; + saltlen = sizeof(NGTCP2_INITIAL_SALT_DRAFT) - 1; + } + + if (ngtcp2_crypto_hkdf_extract(initial_secret, &ctx.md, client_dcid->data, + client_dcid->datalen, salt, saltlen) != 0) { + return -1; + } + + if (side == NGTCP2_CRYPTO_SIDE_SERVER) { + client_secret = rx_secret; + server_secret = tx_secret; + } else { + client_secret = tx_secret; + server_secret = rx_secret; + } + + if (ngtcp2_crypto_hkdf_expand_label( + client_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md, + initial_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, CLABEL, + sizeof(CLABEL) - 1) != 0 || + ngtcp2_crypto_hkdf_expand_label( + server_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md, + initial_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, SLABEL, + sizeof(SLABEL) - 1) != 0) { + return -1; + } + + return 0; +} + +size_t ngtcp2_crypto_packet_protection_ivlen(const ngtcp2_crypto_aead *aead) { + size_t noncelen = ngtcp2_crypto_aead_noncelen(aead); + return ngtcp2_max(8, noncelen); +} + +int ngtcp2_crypto_derive_packet_protection_key( + uint8_t *key, uint8_t *iv, uint8_t *hp_key, uint32_t version, + const ngtcp2_crypto_aead *aead, const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen) { + static const uint8_t KEY_LABEL_V1[] = "quic key"; + static const uint8_t IV_LABEL_V1[] = "quic iv"; + static const uint8_t HP_KEY_LABEL_V1[] = "quic hp"; + static const uint8_t KEY_LABEL_V2[] = "quicv2 key"; + static const uint8_t IV_LABEL_V2[] = "quicv2 iv"; + static const uint8_t HP_KEY_LABEL_V2[] = "quicv2 hp"; + size_t keylen = ngtcp2_crypto_aead_keylen(aead); + size_t ivlen = ngtcp2_crypto_packet_protection_ivlen(aead); + const uint8_t *key_label; + size_t key_labellen; + const uint8_t *iv_label; + size_t iv_labellen; + const uint8_t *hp_key_label; + size_t hp_key_labellen; + + switch (version) { + case NGTCP2_PROTO_VER_V2: + key_label = KEY_LABEL_V2; + key_labellen = sizeof(KEY_LABEL_V2) - 1; + iv_label = IV_LABEL_V2; + iv_labellen = sizeof(IV_LABEL_V2) - 1; + hp_key_label = HP_KEY_LABEL_V2; + hp_key_labellen = sizeof(HP_KEY_LABEL_V2) - 1; + break; + default: + key_label = KEY_LABEL_V1; + key_labellen = sizeof(KEY_LABEL_V1) - 1; + iv_label = IV_LABEL_V1; + iv_labellen = sizeof(IV_LABEL_V1) - 1; + hp_key_label = HP_KEY_LABEL_V1; + hp_key_labellen = sizeof(HP_KEY_LABEL_V1) - 1; + } + + if (ngtcp2_crypto_hkdf_expand_label(key, keylen, md, secret, secretlen, + key_label, key_labellen) != 0) { + return -1; + } + + if (ngtcp2_crypto_hkdf_expand_label(iv, ivlen, md, secret, secretlen, + iv_label, iv_labellen) != 0) { + return -1; + } + + if (hp_key != NULL && + ngtcp2_crypto_hkdf_expand_label(hp_key, keylen, md, secret, secretlen, + hp_key_label, hp_key_labellen) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_update_traffic_secret(uint8_t *dest, + const ngtcp2_crypto_md *md, + const uint8_t *secret, + size_t secretlen) { + static const uint8_t LABEL[] = "quic ku"; + + if (ngtcp2_crypto_hkdf_expand_label(dest, secretlen, md, secret, secretlen, + LABEL, sizeof(LABEL) - 1) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_derive_and_install_rx_key(ngtcp2_conn *conn, uint8_t *key, + uint8_t *iv, uint8_t *hp_key, + ngtcp2_crypto_level level, + const uint8_t *secret, + size_t secretlen) { + const ngtcp2_crypto_ctx *ctx; + const ngtcp2_crypto_aead *aead; + const ngtcp2_crypto_md *md; + const ngtcp2_crypto_cipher *hp; + ngtcp2_crypto_aead_ctx aead_ctx = {0}; + ngtcp2_crypto_cipher_ctx hp_ctx = {0}; + void *tls = ngtcp2_conn_get_tls_native_handle(conn); + uint8_t keybuf[64], ivbuf[64], hp_keybuf[64]; + size_t ivlen; + int rv; + ngtcp2_crypto_ctx cctx; + uint32_t version; + + if (level == NGTCP2_CRYPTO_LEVEL_EARLY && !ngtcp2_conn_is_server(conn)) { + return 0; + } + + if (!key) { + key = keybuf; + } + if (!iv) { + iv = ivbuf; + } + if (!hp_key) { + hp_key = hp_keybuf; + } + + switch (level) { + case NGTCP2_CRYPTO_LEVEL_EARLY: + ngtcp2_crypto_ctx_tls_early(&cctx, tls); + ngtcp2_conn_set_early_crypto_ctx(conn, &cctx); + ctx = ngtcp2_conn_get_early_crypto_ctx(conn); + version = ngtcp2_conn_get_client_chosen_version(conn); + break; + case NGTCP2_CRYPTO_LEVEL_HANDSHAKE: + if (ngtcp2_conn_is_server(conn) && + !ngtcp2_conn_get_negotiated_version(conn)) { + rv = ngtcp2_crypto_set_remote_transport_params(conn, tls); + if (rv != 0) { + return -1; + } + } + /* fall through */ + default: + ctx = ngtcp2_conn_get_crypto_ctx(conn); + version = ngtcp2_conn_get_negotiated_version(conn); + + if (!ctx->aead.native_handle) { + ngtcp2_crypto_ctx_tls(&cctx, tls); + ngtcp2_conn_set_crypto_ctx(conn, &cctx); + ctx = ngtcp2_conn_get_crypto_ctx(conn); + } + } + + aead = &ctx->aead; + md = &ctx->md; + hp = &ctx->hp; + ivlen = ngtcp2_crypto_packet_protection_ivlen(aead); + + if (ngtcp2_crypto_derive_packet_protection_key(key, iv, hp_key, version, aead, + md, secret, secretlen) != 0) { + return -1; + } + + if (ngtcp2_crypto_aead_ctx_decrypt_init(&aead_ctx, aead, key, ivlen) != 0) { + goto fail; + } + + if (ngtcp2_crypto_cipher_ctx_encrypt_init(&hp_ctx, hp, hp_key) != 0) { + goto fail; + } + + switch (level) { + case NGTCP2_CRYPTO_LEVEL_EARLY: + rv = ngtcp2_conn_install_early_key(conn, &aead_ctx, iv, ivlen, &hp_ctx); + if (rv != 0) { + goto fail; + } + break; + case NGTCP2_CRYPTO_LEVEL_HANDSHAKE: + rv = ngtcp2_conn_install_rx_handshake_key(conn, &aead_ctx, iv, ivlen, + &hp_ctx); + if (rv != 0) { + goto fail; + } + break; + case NGTCP2_CRYPTO_LEVEL_APPLICATION: + if (!ngtcp2_conn_is_server(conn)) { + rv = ngtcp2_crypto_set_remote_transport_params(conn, tls); + if (rv != 0) { + goto fail; + } + } + + rv = ngtcp2_conn_install_rx_key(conn, secret, secretlen, &aead_ctx, iv, + ivlen, &hp_ctx); + if (rv != 0) { + goto fail; + } + + break; + default: + goto fail; + } + + return 0; + +fail: + ngtcp2_crypto_cipher_ctx_free(&hp_ctx); + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + return -1; +} + +/* + * crypto_set_local_transport_params gets local QUIC transport + * parameters from |conn| and sets it to |tls|. + * + * This function returns 0 if it succeeds, or -1. + */ +static int crypto_set_local_transport_params(ngtcp2_conn *conn, void *tls) { + ngtcp2_ssize nwrite; + uint8_t buf[256]; + + nwrite = ngtcp2_conn_encode_local_transport_params(conn, buf, sizeof(buf)); + if (nwrite < 0) { + return -1; + } + + if (ngtcp2_crypto_set_local_transport_params(tls, buf, (size_t)nwrite) != 0) { + return -1; + } + + return 0; +} + +int ngtcp2_crypto_derive_and_install_tx_key(ngtcp2_conn *conn, uint8_t *key, + uint8_t *iv, uint8_t *hp_key, + ngtcp2_crypto_level level, + const uint8_t *secret, + size_t secretlen) { + const ngtcp2_crypto_ctx *ctx; + const ngtcp2_crypto_aead *aead; + const ngtcp2_crypto_md *md; + const ngtcp2_crypto_cipher *hp; + ngtcp2_crypto_aead_ctx aead_ctx = {0}; + ngtcp2_crypto_cipher_ctx hp_ctx = {0}; + void *tls = ngtcp2_conn_get_tls_native_handle(conn); + uint8_t keybuf[64], ivbuf[64], hp_keybuf[64]; + size_t ivlen; + int rv; + ngtcp2_crypto_ctx cctx; + uint32_t version; + + if (level == NGTCP2_CRYPTO_LEVEL_EARLY && ngtcp2_conn_is_server(conn)) { + return 0; + } + + if (!key) { + key = keybuf; + } + if (!iv) { + iv = ivbuf; + } + if (!hp_key) { + hp_key = hp_keybuf; + } + + switch (level) { + case NGTCP2_CRYPTO_LEVEL_EARLY: + ngtcp2_crypto_ctx_tls_early(&cctx, tls); + ngtcp2_conn_set_early_crypto_ctx(conn, &cctx); + ctx = ngtcp2_conn_get_early_crypto_ctx(conn); + version = ngtcp2_conn_get_client_chosen_version(conn); + break; + case NGTCP2_CRYPTO_LEVEL_HANDSHAKE: + if (ngtcp2_conn_is_server(conn) && + !ngtcp2_conn_get_negotiated_version(conn)) { + rv = ngtcp2_crypto_set_remote_transport_params(conn, tls); + if (rv != 0) { + return -1; + } + } + /* fall through */ + default: + ctx = ngtcp2_conn_get_crypto_ctx(conn); + version = ngtcp2_conn_get_negotiated_version(conn); + + if (!ctx->aead.native_handle) { + ngtcp2_crypto_ctx_tls(&cctx, tls); + ngtcp2_conn_set_crypto_ctx(conn, &cctx); + ctx = ngtcp2_conn_get_crypto_ctx(conn); + } + } + + aead = &ctx->aead; + md = &ctx->md; + hp = &ctx->hp; + ivlen = ngtcp2_crypto_packet_protection_ivlen(aead); + + if (ngtcp2_crypto_derive_packet_protection_key(key, iv, hp_key, version, aead, + md, secret, secretlen) != 0) { + return -1; + } + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, aead, key, ivlen) != 0) { + goto fail; + } + + if (ngtcp2_crypto_cipher_ctx_encrypt_init(&hp_ctx, hp, hp_key) != 0) { + goto fail; + } + + switch (level) { + case NGTCP2_CRYPTO_LEVEL_EARLY: + rv = ngtcp2_conn_install_early_key(conn, &aead_ctx, iv, ivlen, &hp_ctx); + if (rv != 0) { + goto fail; + } + break; + case NGTCP2_CRYPTO_LEVEL_HANDSHAKE: + rv = ngtcp2_conn_install_tx_handshake_key(conn, &aead_ctx, iv, ivlen, + &hp_ctx); + if (rv != 0) { + goto fail; + } + + if (ngtcp2_conn_is_server(conn) && + crypto_set_local_transport_params(conn, tls) != 0) { + goto fail; + } + + break; + case NGTCP2_CRYPTO_LEVEL_APPLICATION: + rv = ngtcp2_conn_install_tx_key(conn, secret, secretlen, &aead_ctx, iv, + ivlen, &hp_ctx); + if (rv != 0) { + goto fail; + } + + break; + default: + goto fail; + } + + return 0; + +fail: + ngtcp2_crypto_cipher_ctx_free(&hp_ctx); + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + return -1; +} + +int ngtcp2_crypto_derive_and_install_initial_key( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, + uint8_t *rx_hp_key, uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key, + uint32_t version, const ngtcp2_cid *client_dcid) { + uint8_t rx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t tx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t initial_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t rx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + uint8_t rx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN]; + uint8_t rx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + uint8_t tx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + uint8_t tx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN]; + uint8_t tx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + ngtcp2_crypto_ctx ctx; + ngtcp2_crypto_aead retry_aead; + ngtcp2_crypto_aead_ctx rx_aead_ctx = {0}; + ngtcp2_crypto_cipher_ctx rx_hp_ctx = {0}; + ngtcp2_crypto_aead_ctx tx_aead_ctx = {0}; + ngtcp2_crypto_cipher_ctx tx_hp_ctx = {0}; + ngtcp2_crypto_aead_ctx retry_aead_ctx = {0}; + int rv; + int server = ngtcp2_conn_is_server(conn); + const uint8_t *retry_key; + size_t retry_noncelen; + + ngtcp2_crypto_ctx_initial(&ctx); + + if (!rx_secret) { + rx_secret = rx_secretbuf; + } + if (!tx_secret) { + tx_secret = tx_secretbuf; + } + if (!initial_secret) { + initial_secret = initial_secretbuf; + } + + if (!rx_key) { + rx_key = rx_keybuf; + } + if (!rx_iv) { + rx_iv = rx_ivbuf; + } + if (!rx_hp_key) { + rx_hp_key = rx_hp_keybuf; + } + if (!tx_key) { + tx_key = tx_keybuf; + } + if (!tx_iv) { + tx_iv = tx_ivbuf; + } + if (!tx_hp_key) { + tx_hp_key = tx_hp_keybuf; + } + + ngtcp2_conn_set_initial_crypto_ctx(conn, &ctx); + + if (ngtcp2_crypto_derive_initial_secrets( + version, rx_secret, tx_secret, initial_secret, client_dcid, + server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) != + 0) { + return -1; + } + + if (ngtcp2_crypto_derive_packet_protection_key( + rx_key, rx_iv, rx_hp_key, version, &ctx.aead, &ctx.md, rx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + return -1; + } + + if (ngtcp2_crypto_derive_packet_protection_key( + tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + return -1; + } + + if (ngtcp2_crypto_aead_ctx_decrypt_init(&rx_aead_ctx, &ctx.aead, rx_key, + NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) { + goto fail; + } + + if (ngtcp2_crypto_cipher_ctx_encrypt_init(&rx_hp_ctx, &ctx.hp, rx_hp_key) != + 0) { + goto fail; + } + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&tx_aead_ctx, &ctx.aead, tx_key, + NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) { + goto fail; + } + + if (ngtcp2_crypto_cipher_ctx_encrypt_init(&tx_hp_ctx, &ctx.hp, tx_hp_key) != + 0) { + goto fail; + } + + if (!server && !ngtcp2_conn_after_retry(conn)) { + ngtcp2_crypto_aead_retry(&retry_aead); + + switch (version) { + case NGTCP2_PROTO_VER_V1: + retry_key = (const uint8_t *)NGTCP2_RETRY_KEY_V1; + retry_noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1; + break; + case NGTCP2_PROTO_VER_V2: + retry_key = (const uint8_t *)NGTCP2_RETRY_KEY_V2; + retry_noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1; + break; + default: + retry_key = (const uint8_t *)NGTCP2_RETRY_KEY_DRAFT; + retry_noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1; + } + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&retry_aead_ctx, &retry_aead, + retry_key, retry_noncelen) != 0) { + goto fail; + } + } + + rv = ngtcp2_conn_install_initial_key(conn, &rx_aead_ctx, rx_iv, &rx_hp_ctx, + &tx_aead_ctx, tx_iv, &tx_hp_ctx, + NGTCP2_CRYPTO_INITIAL_IVLEN); + if (rv != 0) { + goto fail; + } + + if (retry_aead_ctx.native_handle) { + ngtcp2_conn_set_retry_aead(conn, &retry_aead, &retry_aead_ctx); + } + + return 0; + +fail: + ngtcp2_crypto_aead_ctx_free(&retry_aead_ctx); + ngtcp2_crypto_cipher_ctx_free(&tx_hp_ctx); + ngtcp2_crypto_aead_ctx_free(&tx_aead_ctx); + ngtcp2_crypto_cipher_ctx_free(&rx_hp_ctx); + ngtcp2_crypto_aead_ctx_free(&rx_aead_ctx); + + return -1; +} + +int ngtcp2_crypto_derive_and_install_vneg_initial_key( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, + uint8_t *rx_hp_key, uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key, + uint32_t version, const ngtcp2_cid *client_dcid) { + uint8_t rx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t tx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t initial_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t rx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + uint8_t rx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN]; + uint8_t rx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + uint8_t tx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + uint8_t tx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN]; + uint8_t tx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + const ngtcp2_crypto_ctx *ctx = ngtcp2_conn_get_initial_crypto_ctx(conn); + ngtcp2_crypto_aead_ctx rx_aead_ctx = {0}; + ngtcp2_crypto_cipher_ctx rx_hp_ctx = {0}; + ngtcp2_crypto_aead_ctx tx_aead_ctx = {0}; + ngtcp2_crypto_cipher_ctx tx_hp_ctx = {0}; + int rv; + int server = ngtcp2_conn_is_server(conn); + + if (!rx_secret) { + rx_secret = rx_secretbuf; + } + if (!tx_secret) { + tx_secret = tx_secretbuf; + } + if (!initial_secret) { + initial_secret = initial_secretbuf; + } + + if (!rx_key) { + rx_key = rx_keybuf; + } + if (!rx_iv) { + rx_iv = rx_ivbuf; + } + if (!rx_hp_key) { + rx_hp_key = rx_hp_keybuf; + } + if (!tx_key) { + tx_key = tx_keybuf; + } + if (!tx_iv) { + tx_iv = tx_ivbuf; + } + if (!tx_hp_key) { + tx_hp_key = tx_hp_keybuf; + } + + if (ngtcp2_crypto_derive_initial_secrets( + version, rx_secret, tx_secret, initial_secret, client_dcid, + server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) != + 0) { + return -1; + } + + if (ngtcp2_crypto_derive_packet_protection_key( + rx_key, rx_iv, rx_hp_key, version, &ctx->aead, &ctx->md, rx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + return -1; + } + + if (ngtcp2_crypto_derive_packet_protection_key( + tx_key, tx_iv, tx_hp_key, version, &ctx->aead, &ctx->md, tx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + return -1; + } + + if (ngtcp2_crypto_aead_ctx_decrypt_init(&rx_aead_ctx, &ctx->aead, rx_key, + NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) { + goto fail; + } + + if (ngtcp2_crypto_cipher_ctx_encrypt_init(&rx_hp_ctx, &ctx->hp, rx_hp_key) != + 0) { + goto fail; + } + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&tx_aead_ctx, &ctx->aead, tx_key, + NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) { + goto fail; + } + + if (ngtcp2_crypto_cipher_ctx_encrypt_init(&tx_hp_ctx, &ctx->hp, tx_hp_key) != + 0) { + goto fail; + } + + rv = ngtcp2_conn_install_vneg_initial_key( + conn, version, &rx_aead_ctx, rx_iv, &rx_hp_ctx, &tx_aead_ctx, tx_iv, + &tx_hp_ctx, NGTCP2_CRYPTO_INITIAL_IVLEN); + if (rv != 0) { + goto fail; + } + + return 0; + +fail: + ngtcp2_crypto_cipher_ctx_free(&tx_hp_ctx); + ngtcp2_crypto_aead_ctx_free(&tx_aead_ctx); + ngtcp2_crypto_cipher_ctx_free(&rx_hp_ctx); + ngtcp2_crypto_aead_ctx_free(&rx_aead_ctx); + + return -1; +} + +int ngtcp2_crypto_update_key( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen) { + const ngtcp2_crypto_ctx *ctx = ngtcp2_conn_get_crypto_ctx(conn); + const ngtcp2_crypto_aead *aead = &ctx->aead; + const ngtcp2_crypto_md *md = &ctx->md; + size_t ivlen = ngtcp2_crypto_packet_protection_ivlen(aead); + uint32_t version = ngtcp2_conn_get_negotiated_version(conn); + + if (ngtcp2_crypto_update_traffic_secret(rx_secret, md, current_rx_secret, + secretlen) != 0) { + return -1; + } + + if (ngtcp2_crypto_derive_packet_protection_key( + rx_key, rx_iv, NULL, version, aead, md, rx_secret, secretlen) != 0) { + return -1; + } + + if (ngtcp2_crypto_update_traffic_secret(tx_secret, md, current_tx_secret, + secretlen) != 0) { + return -1; + } + + if (ngtcp2_crypto_derive_packet_protection_key( + tx_key, tx_iv, NULL, version, aead, md, tx_secret, secretlen) != 0) { + return -1; + } + + if (ngtcp2_crypto_aead_ctx_decrypt_init(rx_aead_ctx, aead, rx_key, ivlen) != + 0) { + return -1; + } + + if (ngtcp2_crypto_aead_ctx_encrypt_init(tx_aead_ctx, aead, tx_key, ivlen) != + 0) { + ngtcp2_crypto_aead_ctx_free(rx_aead_ctx); + rx_aead_ctx->native_handle = NULL; + return -1; + } + + return 0; +} + +int ngtcp2_crypto_encrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *plaintext, size_t plaintextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen) { + if (ngtcp2_crypto_encrypt(dest, aead, aead_ctx, plaintext, plaintextlen, + nonce, noncelen, aad, aadlen) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; +} + +int ngtcp2_crypto_decrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *ciphertext, size_t ciphertextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen) { + if (ngtcp2_crypto_decrypt(dest, aead, aead_ctx, ciphertext, ciphertextlen, + nonce, noncelen, aad, aadlen) != 0) { + return NGTCP2_ERR_DECRYPT; + } + return 0; +} + +int ngtcp2_crypto_hp_mask_cb(uint8_t *dest, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx, + const uint8_t *sample) { + if (ngtcp2_crypto_hp_mask(dest, hp, hp_ctx, sample) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; +} + +int ngtcp2_crypto_update_key_cb( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen, void *user_data) { + uint8_t rx_key[64]; + uint8_t tx_key[64]; + (void)conn; + (void)user_data; + + if (ngtcp2_crypto_update_key(conn, rx_secret, tx_secret, rx_aead_ctx, rx_key, + rx_iv, tx_aead_ctx, tx_key, tx_iv, + current_rx_secret, current_tx_secret, + secretlen) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; +} + +int ngtcp2_crypto_generate_stateless_reset_token(uint8_t *token, + const uint8_t *secret, + size_t secretlen, + const ngtcp2_cid *cid) { + static const uint8_t info[] = "stateless_reset"; + ngtcp2_crypto_md md; + + if (ngtcp2_crypto_hkdf(token, NGTCP2_STATELESS_RESET_TOKENLEN, + ngtcp2_crypto_md_sha256(&md), secret, secretlen, + cid->data, cid->datalen, info, + sizeof(info) - 1) != 0) { + return -1; + } + + return 0; +} + +static int crypto_derive_token_key(uint8_t *key, size_t keylen, uint8_t *iv, + size_t ivlen, const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen, + const uint8_t *salt, size_t saltlen, + const uint8_t *info_prefix, + size_t info_prefixlen) { + static const uint8_t key_info_suffix[] = " key"; + static const uint8_t iv_info_suffix[] = " iv"; + uint8_t intsecret[32]; + uint8_t info[32]; + uint8_t *p; + + assert(ngtcp2_crypto_md_hashlen(md) == sizeof(intsecret)); + assert(info_prefixlen + sizeof(key_info_suffix) - 1 <= sizeof(info)); + assert(info_prefixlen + sizeof(iv_info_suffix) - 1 <= sizeof(info)); + + if (ngtcp2_crypto_hkdf_extract(intsecret, md, secret, secretlen, salt, + saltlen) != 0) { + return -1; + } + + memcpy(info, info_prefix, info_prefixlen); + p = info + info_prefixlen; + + memcpy(p, key_info_suffix, sizeof(key_info_suffix) - 1); + p += sizeof(key_info_suffix) - 1; + + if (ngtcp2_crypto_hkdf_expand(key, keylen, md, intsecret, sizeof(intsecret), + info, (size_t)(p - info)) != 0) { + return -1; + } + + p = info + info_prefixlen; + + memcpy(p, iv_info_suffix, sizeof(iv_info_suffix) - 1); + p += sizeof(iv_info_suffix) - 1; + + if (ngtcp2_crypto_hkdf_expand(iv, ivlen, md, intsecret, sizeof(intsecret), + info, (size_t)(p - info)) != 0) { + return -1; + } + + return 0; +} + +static size_t crypto_generate_retry_token_aad(uint8_t *dest, uint32_t version, + const ngtcp2_sockaddr *sa, + ngtcp2_socklen salen, + const ngtcp2_cid *retry_scid) { + uint8_t *p = dest; + + version = ngtcp2_htonl(version); + memcpy(p, &version, sizeof(version)); + memcpy(p, sa, (size_t)salen); + p += salen; + memcpy(p, retry_scid->data, retry_scid->datalen); + p += retry_scid->datalen; + + return (size_t)(p - dest); +} + +static const uint8_t retry_token_info_prefix[] = "retry_token"; + +ngtcp2_ssize ngtcp2_crypto_generate_retry_token( + uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts) { + uint8_t plaintext[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN]; + uint8_t rand_data[NGTCP2_CRYPTO_TOKEN_RAND_DATALEN]; + uint8_t key[32]; + uint8_t iv[32]; + size_t keylen; + size_t ivlen; + ngtcp2_crypto_aead aead; + ngtcp2_crypto_md md; + ngtcp2_crypto_aead_ctx aead_ctx; + size_t plaintextlen; + uint8_t aad[sizeof(version) + sizeof(ngtcp2_sockaddr_storage) + + NGTCP2_MAX_CIDLEN]; + size_t aadlen; + uint8_t *p = plaintext; + ngtcp2_tstamp ts_be = ngtcp2_htonl64(ts); + int rv; + + memset(plaintext, 0, sizeof(plaintext)); + + *p++ = (uint8_t)odcid->datalen; + memcpy(p, odcid->data, odcid->datalen); + p += NGTCP2_MAX_CIDLEN; + memcpy(p, &ts_be, sizeof(ts_be)); + p += sizeof(ts_be); + + plaintextlen = (size_t)(p - plaintext); + + if (ngtcp2_crypto_random(rand_data, sizeof(rand_data)) != 0) { + return -1; + } + + ngtcp2_crypto_aead_aes_128_gcm(&aead); + ngtcp2_crypto_md_sha256(&md); + + keylen = ngtcp2_crypto_aead_keylen(&aead); + ivlen = ngtcp2_crypto_aead_noncelen(&aead); + + assert(sizeof(key) >= keylen); + assert(sizeof(iv) >= ivlen); + + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, + rand_data, sizeof(rand_data), + retry_token_info_prefix, + sizeof(retry_token_info_prefix) - 1) != 0) { + return -1; + } + + aadlen = crypto_generate_retry_token_aad(aad, version, remote_addr, + remote_addrlen, retry_scid); + + p = token; + *p++ = NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY; + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &aead, key, ivlen) != 0) { + return -1; + } + + rv = ngtcp2_crypto_encrypt(p, &aead, &aead_ctx, plaintext, plaintextlen, iv, + ivlen, aad, aadlen); + + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + if (rv != 0) { + return -1; + } + + p += plaintextlen + aead.max_overhead; + memcpy(p, rand_data, sizeof(rand_data)); + p += sizeof(rand_data); + + return p - token; +} + +int ngtcp2_crypto_verify_retry_token( + ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, + const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts) { + uint8_t + plaintext[/* cid len = */ 1 + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp)]; + uint8_t key[32]; + uint8_t iv[32]; + size_t keylen; + size_t ivlen; + ngtcp2_crypto_aead_ctx aead_ctx; + ngtcp2_crypto_aead aead; + ngtcp2_crypto_md md; + uint8_t aad[sizeof(version) + sizeof(ngtcp2_sockaddr_storage) + + NGTCP2_MAX_CIDLEN]; + size_t aadlen; + const uint8_t *rand_data; + const uint8_t *ciphertext; + size_t ciphertextlen; + size_t cil; + int rv; + ngtcp2_tstamp gen_ts; + + if (tokenlen != NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN || + token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) { + return -1; + } + + rand_data = token + tokenlen - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN; + ciphertext = token + 1; + ciphertextlen = tokenlen - 1 - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN; + + ngtcp2_crypto_aead_aes_128_gcm(&aead); + ngtcp2_crypto_md_sha256(&md); + + keylen = ngtcp2_crypto_aead_keylen(&aead); + ivlen = ngtcp2_crypto_aead_noncelen(&aead); + + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, + rand_data, NGTCP2_CRYPTO_TOKEN_RAND_DATALEN, + retry_token_info_prefix, + sizeof(retry_token_info_prefix) - 1) != 0) { + return -1; + } + + aadlen = crypto_generate_retry_token_aad(aad, version, remote_addr, + remote_addrlen, dcid); + + if (ngtcp2_crypto_aead_ctx_decrypt_init(&aead_ctx, &aead, key, ivlen) != 0) { + return -1; + } + + rv = ngtcp2_crypto_decrypt(plaintext, &aead, &aead_ctx, ciphertext, + ciphertextlen, iv, ivlen, aad, aadlen); + + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + if (rv != 0) { + return -1; + } + + cil = plaintext[0]; + + assert(cil == 0 || (cil >= NGTCP2_MIN_CIDLEN && cil <= NGTCP2_MAX_CIDLEN)); + + memcpy(&gen_ts, plaintext + /* cid len = */ 1 + NGTCP2_MAX_CIDLEN, + sizeof(gen_ts)); + + gen_ts = ngtcp2_ntohl64(gen_ts); + if (gen_ts + timeout <= ts) { + return -1; + } + + ngtcp2_cid_init(odcid, plaintext + /* cid len = */ 1, cil); + + return 0; +} + +static size_t crypto_generate_regular_token_aad(uint8_t *dest, + const ngtcp2_sockaddr *sa) { + const uint8_t *addr; + size_t addrlen; + + switch (sa->sa_family) { + case AF_INET: + addr = (const uint8_t *)&((const ngtcp2_sockaddr_in *)(void *)sa)->sin_addr; + addrlen = sizeof(((const ngtcp2_sockaddr_in *)(void *)sa)->sin_addr); + break; + case AF_INET6: + addr = + (const uint8_t *)&((const ngtcp2_sockaddr_in6 *)(void *)sa)->sin6_addr; + addrlen = sizeof(((const ngtcp2_sockaddr_in6 *)(void *)sa)->sin6_addr); + break; + default: + assert(0); + abort(); + } + + memcpy(dest, addr, addrlen); + + return addrlen; +} + +static const uint8_t regular_token_info_prefix[] = "regular_token"; + +ngtcp2_ssize ngtcp2_crypto_generate_regular_token( + uint8_t *token, const uint8_t *secret, size_t secretlen, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + ngtcp2_tstamp ts) { + uint8_t plaintext[sizeof(ngtcp2_tstamp)]; + uint8_t rand_data[NGTCP2_CRYPTO_TOKEN_RAND_DATALEN]; + uint8_t key[32]; + uint8_t iv[32]; + size_t keylen; + size_t ivlen; + ngtcp2_crypto_aead aead; + ngtcp2_crypto_md md; + ngtcp2_crypto_aead_ctx aead_ctx; + size_t plaintextlen; + uint8_t aad[sizeof(ngtcp2_sockaddr_in6)]; + size_t aadlen; + uint8_t *p = plaintext; + ngtcp2_tstamp ts_be = ngtcp2_htonl64(ts); + int rv; + (void)remote_addrlen; + + memcpy(p, &ts_be, sizeof(ts_be)); + p += sizeof(ts_be); + + plaintextlen = (size_t)(p - plaintext); + + if (ngtcp2_crypto_random(rand_data, sizeof(rand_data)) != 0) { + return -1; + } + + ngtcp2_crypto_aead_aes_128_gcm(&aead); + ngtcp2_crypto_md_sha256(&md); + + keylen = ngtcp2_crypto_aead_keylen(&aead); + ivlen = ngtcp2_crypto_aead_noncelen(&aead); + + assert(sizeof(key) >= keylen); + assert(sizeof(iv) >= ivlen); + + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, + rand_data, sizeof(rand_data), + regular_token_info_prefix, + sizeof(regular_token_info_prefix) - 1) != 0) { + return -1; + } + + aadlen = crypto_generate_regular_token_aad(aad, remote_addr); + + p = token; + *p++ = NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR; + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &aead, key, ivlen) != 0) { + return -1; + } + + rv = ngtcp2_crypto_encrypt(p, &aead, &aead_ctx, plaintext, plaintextlen, iv, + ivlen, aad, aadlen); + + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + if (rv != 0) { + return -1; + } + + p += plaintextlen + aead.max_overhead; + memcpy(p, rand_data, sizeof(rand_data)); + p += sizeof(rand_data); + + return p - token; +} + +int ngtcp2_crypto_verify_regular_token(const uint8_t *token, size_t tokenlen, + const uint8_t *secret, size_t secretlen, + const ngtcp2_sockaddr *remote_addr, + ngtcp2_socklen remote_addrlen, + ngtcp2_duration timeout, + ngtcp2_tstamp ts) { + uint8_t plaintext[sizeof(ngtcp2_tstamp)]; + uint8_t key[32]; + uint8_t iv[32]; + size_t keylen; + size_t ivlen; + ngtcp2_crypto_aead_ctx aead_ctx; + ngtcp2_crypto_aead aead; + ngtcp2_crypto_md md; + uint8_t aad[sizeof(ngtcp2_sockaddr_in6)]; + size_t aadlen; + const uint8_t *rand_data; + const uint8_t *ciphertext; + size_t ciphertextlen; + int rv; + ngtcp2_tstamp gen_ts; + (void)remote_addrlen; + + if (tokenlen != NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN || + token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) { + return -1; + } + + rand_data = token + tokenlen - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN; + ciphertext = token + 1; + ciphertextlen = tokenlen - 1 - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN; + + ngtcp2_crypto_aead_aes_128_gcm(&aead); + ngtcp2_crypto_md_sha256(&md); + + keylen = ngtcp2_crypto_aead_keylen(&aead); + ivlen = ngtcp2_crypto_aead_noncelen(&aead); + + if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen, + rand_data, NGTCP2_CRYPTO_TOKEN_RAND_DATALEN, + regular_token_info_prefix, + sizeof(regular_token_info_prefix) - 1) != 0) { + return -1; + } + + aadlen = crypto_generate_regular_token_aad(aad, remote_addr); + + if (ngtcp2_crypto_aead_ctx_decrypt_init(&aead_ctx, &aead, key, ivlen) != 0) { + return -1; + } + + rv = ngtcp2_crypto_decrypt(plaintext, &aead, &aead_ctx, ciphertext, + ciphertextlen, iv, ivlen, aad, aadlen); + + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + if (rv != 0) { + return -1; + } + + memcpy(&gen_ts, plaintext, sizeof(gen_ts)); + + gen_ts = ngtcp2_ntohl64(gen_ts); + if (gen_ts + timeout <= ts) { + return -1; + } + + return 0; +} + +ngtcp2_ssize ngtcp2_crypto_write_connection_close( + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen) { + uint8_t rx_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t tx_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t initial_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN]; + uint8_t tx_key[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + uint8_t tx_iv[NGTCP2_CRYPTO_INITIAL_IVLEN]; + uint8_t tx_hp_key[NGTCP2_CRYPTO_INITIAL_KEYLEN]; + ngtcp2_crypto_ctx ctx; + ngtcp2_ssize spktlen; + ngtcp2_crypto_aead_ctx aead_ctx = {0}; + ngtcp2_crypto_cipher_ctx hp_ctx = {0}; + + ngtcp2_crypto_ctx_initial(&ctx); + + if (ngtcp2_crypto_derive_initial_secrets(version, rx_secret, tx_secret, + initial_secret, scid, + NGTCP2_CRYPTO_SIDE_SERVER) != 0) { + return -1; + } + + if (ngtcp2_crypto_derive_packet_protection_key( + tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret, + NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) { + return -1; + } + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &ctx.aead, tx_key, + NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) { + spktlen = -1; + goto end; + } + + if (ngtcp2_crypto_cipher_ctx_encrypt_init(&hp_ctx, &ctx.hp, tx_hp_key) != 0) { + spktlen = -1; + goto end; + } + + spktlen = ngtcp2_pkt_write_connection_close( + dest, destlen, version, dcid, scid, error_code, reason, reasonlen, + ngtcp2_crypto_encrypt_cb, &ctx.aead, &aead_ctx, tx_iv, + ngtcp2_crypto_hp_mask_cb, &ctx.hp, &hp_ctx); + if (spktlen < 0) { + spktlen = -1; + } + +end: + ngtcp2_crypto_cipher_ctx_free(&hp_ctx); + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + return spktlen; +} + +ngtcp2_ssize ngtcp2_crypto_write_retry(uint8_t *dest, size_t destlen, + uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, + const ngtcp2_cid *odcid, + const uint8_t *token, size_t tokenlen) { + ngtcp2_crypto_aead aead; + ngtcp2_ssize spktlen; + ngtcp2_crypto_aead_ctx aead_ctx = {0}; + const uint8_t *key; + size_t noncelen; + + ngtcp2_crypto_aead_retry(&aead); + + switch (version) { + case NGTCP2_PROTO_VER_V1: + key = (const uint8_t *)NGTCP2_RETRY_KEY_V1; + noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1; + break; + case NGTCP2_PROTO_VER_V2: + key = (const uint8_t *)NGTCP2_RETRY_KEY_V2; + noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1; + break; + default: + key = (const uint8_t *)NGTCP2_RETRY_KEY_DRAFT; + noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1; + } + + if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &aead, key, noncelen) != + 0) { + return -1; + } + + spktlen = ngtcp2_pkt_write_retry(dest, destlen, version, dcid, scid, odcid, + token, tokenlen, ngtcp2_crypto_encrypt_cb, + &aead, &aead_ctx); + if (spktlen < 0) { + spktlen = -1; + } + + ngtcp2_crypto_aead_ctx_free(&aead_ctx); + + return spktlen; +} + +int ngtcp2_crypto_client_initial_cb(ngtcp2_conn *conn, void *user_data) { + const ngtcp2_cid *dcid = ngtcp2_conn_get_dcid(conn); + void *tls = ngtcp2_conn_get_tls_native_handle(conn); + (void)user_data; + + if (ngtcp2_crypto_derive_and_install_initial_key( + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + if (crypto_set_local_transport_params(conn, tls) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + if (ngtcp2_crypto_read_write_crypto_data(conn, NGTCP2_CRYPTO_LEVEL_INITIAL, + NULL, 0) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +int ngtcp2_crypto_recv_retry_cb(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd, + void *user_data) { + (void)user_data; + + if (ngtcp2_crypto_derive_and_install_initial_key( + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ngtcp2_conn_get_client_chosen_version(conn), &hd->scid) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +int ngtcp2_crypto_recv_client_initial_cb(ngtcp2_conn *conn, + const ngtcp2_cid *dcid, + void *user_data) { + (void)user_data; + + if (ngtcp2_crypto_derive_and_install_initial_key( + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +int ngtcp2_crypto_version_negotiation_cb(ngtcp2_conn *conn, uint32_t version, + const ngtcp2_cid *client_dcid, + void *user_data) { + (void)user_data; + + if (ngtcp2_crypto_derive_and_install_vneg_initial_key( + conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, version, + client_dcid) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +void ngtcp2_crypto_delete_crypto_aead_ctx_cb(ngtcp2_conn *conn, + ngtcp2_crypto_aead_ctx *aead_ctx, + void *user_data) { + (void)conn; + (void)user_data; + + ngtcp2_crypto_aead_ctx_free(aead_ctx); +} + +void ngtcp2_crypto_delete_crypto_cipher_ctx_cb( + ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data) { + (void)conn; + (void)user_data; + + ngtcp2_crypto_cipher_ctx_free(cipher_ctx); +} + +int ngtcp2_crypto_recv_crypto_data_cb(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + uint64_t offset, const uint8_t *data, + size_t datalen, void *user_data) { + int rv; + (void)offset; + (void)user_data; + + if (ngtcp2_crypto_read_write_crypto_data(conn, crypto_level, data, datalen) != + 0) { + rv = ngtcp2_conn_get_tls_error(conn); + if (rv) { + return rv; + } + return NGTCP2_ERR_CRYPTO; + } + + return 0; +} diff --git a/src/contrib/libngtcp2/ngtcp2/crypto/shared.h b/src/contrib/libngtcp2/ngtcp2/crypto/shared.h new file mode 100644 index 0000000..c7690c1 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/crypto/shared.h @@ -0,0 +1,350 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_SHARED_H +#define NGTCP2_SHARED_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2_crypto.h> + +/** + * @macro + * + * :macro:`NGTCP2_INITIAL_SALT_DRAFT` is a salt value which is used to + * derive initial secret. It is used for QUIC draft versions. + */ +#define NGTCP2_INITIAL_SALT_DRAFT \ + "\xaf\xbf\xec\x28\x99\x93\xd2\x4c\x9e\x97\x86\xf1\x9c\x61\x11\xe0\x43\x90" \ + "\xa8\x99" + +/** + * @macro + * + * :macro:`NGTCP2_INITIAL_SALT_V1` is a salt value which is used to + * derive initial secret. It is used for QUIC v1. + */ +#define NGTCP2_INITIAL_SALT_V1 \ + "\x38\x76\x2c\xf7\xf5\x59\x34\xb3\x4d\x17\x9a\xe6\xa4\xc8\x0c\xad\xcc\xbb" \ + "\x7f\x0a" + +/** + * @macro + * + * :macro:`NGTCP2_INITIAL_SALT_V2` is a salt value which is used to + * derive initial secret. It is used for QUIC v2. + */ +#define NGTCP2_INITIAL_SALT_V2 \ + "\x0d\xed\xe3\xde\xf7\x00\xa6\xdb\x81\x93\x81\xbe\x6e\x26\x9d\xcb\xf9\xbd" \ + "\x2e\xd9" + +/* Maximum key usage (encryption) limits */ +#define NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_GCM (1ULL << 23) +#define NGTCP2_CRYPTO_MAX_ENCRYPTION_CHACHA20_POLY1305 (1ULL << 62) +#define NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_CCM (2965820ULL) + +/* Maximum authentication failure (decryption) limits during the + lifetime of a connection. */ +#define NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_GCM (1ULL << 52) +#define NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_CHACHA20_POLY1305 (1ULL << 36) +#define NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_CCM (2965820ULL) + +/** + * @function + * + * `ngtcp2_crypto_ctx_initial` initializes |ctx| for Initial packet + * encryption and decryption. + */ +ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_initial(ngtcp2_crypto_ctx *ctx); + +/** + * @function + * + * `ngtcp2_crypto_aead_init` initializes |aead| with the provided + * |aead_native_handle| which is an underlying AEAD object. + * + * If libngtcp2_crypto_openssl is linked, |aead_native_handle| must be + * a pointer to EVP_CIPHER. + * + * If libngtcp2_crypto_gnutls is linked, |aead_native_handle| must be + * gnutls_cipher_algorithm_t casted to ``void *``. + * + * If libngtcp2_crypto_boringssl is linked, |aead_native_handle| must + * be a pointer to EVP_AEAD. + */ +ngtcp2_crypto_aead *ngtcp2_crypto_aead_init(ngtcp2_crypto_aead *aead, + void *aead_native_handle); + +/** + * @function + * + * `ngtcp2_crypto_aead_retry` initializes |aead| with the AEAD cipher + * AEAD_AES_128_GCM for Retry packet integrity protection. + */ +ngtcp2_crypto_aead *ngtcp2_crypto_aead_retry(ngtcp2_crypto_aead *aead); + +/** + * @function + * + * `ngtcp2_crypto_derive_initial_secrets` derives initial secrets. + * |rx_secret| and |tx_secret| must point to the buffer of at least 32 + * bytes capacity. rx for read and tx for write. This function + * writes rx and tx secrets into |rx_secret| and |tx_secret| + * respectively. The length of secret is 32 bytes long. + * |client_dcid| is the destination connection ID in first Initial + * packet of client. If |initial_secret| is not NULL, the initial + * secret is written to it. It must point to the buffer which has at + * least 32 bytes capacity. The initial secret is 32 bytes long. + * |side| specifies the side of application. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_derive_initial_secrets(uint32_t version, uint8_t *rx_secret, + uint8_t *tx_secret, + uint8_t *initial_secret, + const ngtcp2_cid *client_dcid, + ngtcp2_crypto_side side); + +/** + * @function + * + * `ngtcp2_crypto_derive_packet_protection_key` derives packet + * protection key. This function writes packet protection key into + * the buffer pointed by |key|. The length of derived key is + * `ngtcp2_crypto_aead_keylen(aead) <ngtcp2_crypto_aead_keylen>` + * bytes. |key| must have enough capacity to store the key. This + * function writes packet protection IV into |iv|. The length of + * derived IV is `ngtcp2_crypto_packet_protection_ivlen(aead) + * <ngtcp2_crypto_packet_protection_ivlen>` bytes. |iv| must have + * enough capacity to store the IV. + * + * If |hp| is not NULL, this function also derives packet header + * protection key and writes the key into the buffer pointed by |hp|. + * The length of derived key is `ngtcp2_crypto_aead_keylen(aead) + * <ngtcp2_crypto_aead_keylen>` bytes. |hp|, if not NULL, must have + * enough capacity to store the key. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_derive_packet_protection_key(uint8_t *key, uint8_t *iv, + uint8_t *hp, uint32_t version, + const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_md *md, + const uint8_t *secret, + size_t secretlen); + +/** + * @function + * + * `ngtcp2_crypto_update_traffic_secret` derives the next generation + * of the traffic secret. |secret| specifies the current secret and + * its length is given in |secretlen|. The length of new key is the + * same as the current key. This function writes new key into the + * buffer pointed by |dest|. |dest| must have the enough capacity to + * store the new key. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_update_traffic_secret(uint8_t *dest, + const ngtcp2_crypto_md *md, + const uint8_t *secret, + size_t secretlen); + +/** + * @function + * + * `ngtcp2_crypto_set_local_transport_params` sets QUIC transport + * parameter, which is encoded in wire format and stored in the buffer + * pointed by |buf| of length |len|, to the native handle |tls|. + * + * |tls| points to a implementation dependent TLS session object. If + * libngtcp2_crypto_openssl is linked, |tls| must be a pointer to SSL + * object. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_set_local_transport_params(void *tls, const uint8_t *buf, + size_t len); + +/** + * @function + * + * `ngtcp2_crypto_set_remote_transport_params` retrieves a remote QUIC + * transport parameters from |tls| and sets it to |conn| using + * `ngtcp2_conn_set_remote_transport_params`. + * + * |tls| points to a implementation dependent TLS session object. If + * libngtcp2_crypto_openssl is linked, |tls| must be a pointer to SSL + * object. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_set_remote_transport_params(ngtcp2_conn *conn, void *tls); + +/** + * @function + * + * `ngtcp2_crypto_derive_and_install_initial_key` derives initial + * keying materials and installs keys to |conn|. + * + * If |rx_secret| is not NULL, the secret for decryption is written to + * the buffer pointed by |rx_secret|. The length of secret is 32 + * bytes, and |rx_secret| must point to the buffer which has enough + * capacity. + * + * If |tx_secret| is not NULL, the secret for encryption is written to + * the buffer pointed by |tx_secret|. The length of secret is 32 + * bytes, and |tx_secret| must point to the buffer which has enough + * capacity. + * + * If |initial_secret| is not NULL, the initial secret is written to + * the buffer pointed by |initial_secret|. The length of secret is 32 + * bytes, and |initial_secret| must point to the buffer which has + * enough capacity. + * + * |client_dcid| is the destination connection ID in first Initial + * packet of client. + * + * If |rx_key| is not NULL, the derived packet protection key for + * decryption is written to the buffer pointed by |rx_key|. If + * |rx_iv| is not NULL, the derived packet protection IV for + * decryption is written to the buffer pointed by |rx_iv|. If |rx_hp| + * is not NULL, the derived header protection key for decryption is + * written to the buffer pointed by |rx_hp|. + * + * If |tx_key| is not NULL, the derived packet protection key for + * encryption is written to the buffer pointed by |tx_key|. If + * |tx_iv| is not NULL, the derived packet protection IV for + * encryption is written to the buffer pointed by |tx_iv|. If |tx_hp| + * is not NULL, the derived header protection key for encryption is + * written to the buffer pointed by |tx_hp|. + * + * The length of packet protection key and header protection key is 16 + * bytes long. The length of packet protection IV is 12 bytes long. + * + * This function calls `ngtcp2_conn_set_initial_crypto_ctx` to set + * initial AEAD and message digest algorithm. After the successful + * call of this function, application can use + * `ngtcp2_conn_get_initial_crypto_ctx` to get the object. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_derive_and_install_initial_key( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp, + uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version, + const ngtcp2_cid *client_dcid); + +/** + * @function + * + * `ngtcp2_crypto_derive_and_install_vneg_initial_key` derives initial + * keying materials and installs keys to |conn|. This function is + * dedicated to install keys for |version| which is negotiated, or + * being negotiated. + * + * If |rx_secret| is not NULL, the secret for decryption is written to + * the buffer pointed by |rx_secret|. The length of secret is 32 + * bytes, and |rx_secret| must point to the buffer which has enough + * capacity. + * + * If |tx_secret| is not NULL, the secret for encryption is written to + * the buffer pointed by |tx_secret|. The length of secret is 32 + * bytes, and |tx_secret| must point to the buffer which has enough + * capacity. + * + * If |initial_secret| is not NULL, the initial secret is written to + * the buffer pointed by |initial_secret|. The length of secret is 32 + * bytes, and |initial_secret| must point to the buffer which has + * enough capacity. + * + * |client_dcid| is the destination connection ID in first Initial + * packet of client. + * + * If |rx_key| is not NULL, the derived packet protection key for + * decryption is written to the buffer pointed by |rx_key|. If + * |rx_iv| is not NULL, the derived packet protection IV for + * decryption is written to the buffer pointed by |rx_iv|. If |rx_hp| + * is not NULL, the derived header protection key for decryption is + * written to the buffer pointed by |rx_hp|. + * + * If |tx_key| is not NULL, the derived packet protection key for + * encryption is written to the buffer pointed by |tx_key|. If + * |tx_iv| is not NULL, the derived packet protection IV for + * encryption is written to the buffer pointed by |tx_iv|. If |tx_hp| + * is not NULL, the derived header protection key for encryption is + * written to the buffer pointed by |tx_hp|. + * + * The length of packet protection key and header protection key is 16 + * bytes long. The length of packet protection IV is 12 bytes long. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_derive_and_install_vneg_initial_key( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp, + uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version, + const ngtcp2_cid *client_dcid); + +/** + * @function + * + * `ngtcp2_crypto_cipher_ctx_encrypt_init` initializes |cipher_ctx| + * with new cipher context object for encryption which is constructed + * to use |key| as encryption key. |cipher| specifies cipher to use. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_cipher_ctx_encrypt_init(ngtcp2_crypto_cipher_ctx *cipher_ctx, + const ngtcp2_crypto_cipher *cipher, + const uint8_t *key); + +/** + * @function + * + * `ngtcp2_crypto_cipher_ctx_free` frees up resources used by + * |cipher_ctx|. This function does not free the memory pointed by + * |cipher_ctx| itself. + */ +void ngtcp2_crypto_cipher_ctx_free(ngtcp2_crypto_cipher_ctx *cipher_ctx); + +/* + * `ngtcp2_crypto_md_sha256` initializes |md| with SHA256 message + * digest algorithm and returns |md|. + */ +ngtcp2_crypto_md *ngtcp2_crypto_md_sha256(ngtcp2_crypto_md *md); + +ngtcp2_crypto_aead *ngtcp2_crypto_aead_aes_128_gcm(ngtcp2_crypto_aead *aead); + +/* + * `ngtcp2_crypto_random` writes cryptographically-secure random + * |datalen| bytes into the buffer pointed by |data|. + * + * This function returns 0 if it succeeds, or -1. + */ +int ngtcp2_crypto_random(uint8_t *data, size_t datalen); + +#endif /* NGTCP2_SHARED_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c new file mode 100644 index 0000000..3f1f9b3 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c @@ -0,0 +1,335 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_acktr.h" + +#include <assert.h> + +#include "ngtcp2_macro.h" + +static void acktr_entry_init(ngtcp2_acktr_entry *ent, int64_t pkt_num, + ngtcp2_tstamp tstamp) { + ent->pkt_num = pkt_num; + ent->len = 1; + ent->tstamp = tstamp; +} + +int ngtcp2_acktr_entry_objalloc_new(ngtcp2_acktr_entry **ent, int64_t pkt_num, + ngtcp2_tstamp tstamp, + ngtcp2_objalloc *objalloc) { + *ent = ngtcp2_objalloc_acktr_entry_get(objalloc); + if (*ent == NULL) { + return NGTCP2_ERR_NOMEM; + } + + acktr_entry_init(*ent, pkt_num, tstamp); + + return 0; +} + +void ngtcp2_acktr_entry_objalloc_del(ngtcp2_acktr_entry *ent, + ngtcp2_objalloc *objalloc) { + ngtcp2_objalloc_acktr_entry_release(objalloc, ent); +} + +static int greater(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { + return *(int64_t *)lhs > *(int64_t *)rhs; +} + +int ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log, + const ngtcp2_mem *mem) { + int rv; + + ngtcp2_objalloc_acktr_entry_init(&acktr->objalloc, 32, mem); + + rv = ngtcp2_ringbuf_init(&acktr->acks, 32, sizeof(ngtcp2_acktr_ack_entry), + mem); + if (rv != 0) { + return rv; + } + + ngtcp2_ksl_init(&acktr->ents, greater, sizeof(int64_t), mem); + + acktr->log = log; + acktr->mem = mem; + acktr->flags = NGTCP2_ACKTR_FLAG_NONE; + acktr->first_unacked_ts = UINT64_MAX; + acktr->rx_npkt = 0; + + return 0; +} + +void ngtcp2_acktr_free(ngtcp2_acktr *acktr) { +#ifdef NOMEMPOOL + ngtcp2_ksl_it it; +#endif /* NOMEMPOOL */ + + if (acktr == NULL) { + return; + } + +#ifdef NOMEMPOOL + for (it = ngtcp2_ksl_begin(&acktr->ents); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + ngtcp2_acktr_entry_objalloc_del(ngtcp2_ksl_it_get(&it), &acktr->objalloc); + } +#endif /* NOMEMPOOL */ + + ngtcp2_ksl_free(&acktr->ents); + + ngtcp2_ringbuf_free(&acktr->acks); + + ngtcp2_objalloc_free(&acktr->objalloc); +} + +int ngtcp2_acktr_add(ngtcp2_acktr *acktr, int64_t pkt_num, int active_ack, + ngtcp2_tstamp ts) { + ngtcp2_ksl_it it, prev_it; + ngtcp2_acktr_entry *ent, *prev_ent, *delent; + int rv; + int added = 0; + + if (ngtcp2_ksl_len(&acktr->ents)) { + it = ngtcp2_ksl_lower_bound(&acktr->ents, &pkt_num); + if (ngtcp2_ksl_it_end(&it)) { + ngtcp2_ksl_it_prev(&it); + ent = ngtcp2_ksl_it_get(&it); + + assert(ent->pkt_num >= pkt_num + (int64_t)ent->len); + + if (ent->pkt_num == pkt_num + (int64_t)ent->len) { + ++ent->len; + added = 1; + } + } else { + ent = ngtcp2_ksl_it_get(&it); + + assert(ent->pkt_num != pkt_num); + + if (ngtcp2_ksl_it_begin(&it)) { + if (ent->pkt_num + 1 == pkt_num) { + ngtcp2_ksl_update_key(&acktr->ents, &ent->pkt_num, &pkt_num); + ent->pkt_num = pkt_num; + ent->tstamp = ts; + ++ent->len; + added = 1; + } + } else { + prev_it = it; + ngtcp2_ksl_it_prev(&prev_it); + prev_ent = ngtcp2_ksl_it_get(&prev_it); + + assert(prev_ent->pkt_num >= pkt_num + (int64_t)prev_ent->len); + + if (ent->pkt_num + 1 == pkt_num) { + if (prev_ent->pkt_num == pkt_num + (int64_t)prev_ent->len) { + prev_ent->len += ent->len + 1; + ngtcp2_ksl_remove_hint(&acktr->ents, NULL, &it, &ent->pkt_num); + ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc); + added = 1; + } else { + ngtcp2_ksl_update_key(&acktr->ents, &ent->pkt_num, &pkt_num); + ent->pkt_num = pkt_num; + ent->tstamp = ts; + ++ent->len; + added = 1; + } + } else if (prev_ent->pkt_num == pkt_num + (int64_t)prev_ent->len) { + ++prev_ent->len; + added = 1; + } + } + } + } + + if (!added) { + rv = ngtcp2_acktr_entry_objalloc_new(&ent, pkt_num, ts, &acktr->objalloc); + if (rv != 0) { + return rv; + } + rv = ngtcp2_ksl_insert(&acktr->ents, NULL, &ent->pkt_num, ent); + if (rv != 0) { + ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc); + return rv; + } + } + + if (active_ack) { + acktr->flags |= NGTCP2_ACKTR_FLAG_ACTIVE_ACK; + if (acktr->first_unacked_ts == UINT64_MAX) { + acktr->first_unacked_ts = ts; + } + } + + if (ngtcp2_ksl_len(&acktr->ents) > NGTCP2_ACKTR_MAX_ENT) { + it = ngtcp2_ksl_end(&acktr->ents); + ngtcp2_ksl_it_prev(&it); + delent = ngtcp2_ksl_it_get(&it); + ngtcp2_ksl_remove_hint(&acktr->ents, NULL, &it, &delent->pkt_num); + ngtcp2_acktr_entry_objalloc_del(delent, &acktr->objalloc); + } + + return 0; +} + +void ngtcp2_acktr_forget(ngtcp2_acktr *acktr, ngtcp2_acktr_entry *ent) { + ngtcp2_ksl_it it; + + it = ngtcp2_ksl_lower_bound(&acktr->ents, &ent->pkt_num); + assert(*(int64_t *)ngtcp2_ksl_it_key(&it) == (int64_t)ent->pkt_num); + + for (; !ngtcp2_ksl_it_end(&it);) { + ent = ngtcp2_ksl_it_get(&it); + ngtcp2_ksl_remove_hint(&acktr->ents, &it, &it, &ent->pkt_num); + ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc); + } +} + +ngtcp2_ksl_it ngtcp2_acktr_get(ngtcp2_acktr *acktr) { + return ngtcp2_ksl_begin(&acktr->ents); +} + +int ngtcp2_acktr_empty(ngtcp2_acktr *acktr) { + ngtcp2_ksl_it it = ngtcp2_ksl_begin(&acktr->ents); + return ngtcp2_ksl_it_end(&it); +} + +ngtcp2_acktr_ack_entry *ngtcp2_acktr_add_ack(ngtcp2_acktr *acktr, + int64_t pkt_num, + int64_t largest_ack) { + ngtcp2_acktr_ack_entry *ent = ngtcp2_ringbuf_push_front(&acktr->acks); + + ent->largest_ack = largest_ack; + ent->pkt_num = pkt_num; + + return ent; +} + +/* + * acktr_remove removes |ent| from |acktr|. |it| must point to the + * node whose key identifies |ent|. The iterator which points to the + * entry next to |ent| is assigned to |it|. + */ +static void acktr_remove(ngtcp2_acktr *acktr, ngtcp2_ksl_it *it, + ngtcp2_acktr_entry *ent) { + ngtcp2_ksl_remove_hint(&acktr->ents, it, it, &ent->pkt_num); + ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc); +} + +static void acktr_on_ack(ngtcp2_acktr *acktr, ngtcp2_ringbuf *rb, + size_t ack_ent_offset) { + ngtcp2_acktr_ack_entry *ack_ent; + ngtcp2_acktr_entry *ent; + ngtcp2_ksl_it it; + + assert(ngtcp2_ringbuf_len(rb)); + + ack_ent = ngtcp2_ringbuf_get(rb, ack_ent_offset); + + /* Assume that ngtcp2_pkt_validate_ack(fr) returns 0 */ + it = ngtcp2_ksl_lower_bound(&acktr->ents, &ack_ent->largest_ack); + for (; !ngtcp2_ksl_it_end(&it);) { + ent = ngtcp2_ksl_it_get(&it); + acktr_remove(acktr, &it, ent); + } + + if (ngtcp2_ksl_len(&acktr->ents)) { + assert(ngtcp2_ksl_it_end(&it)); + + ngtcp2_ksl_it_prev(&it); + ent = ngtcp2_ksl_it_get(&it); + if (ent->pkt_num > ack_ent->largest_ack && + ack_ent->largest_ack >= ent->pkt_num - (int64_t)(ent->len - 1)) { + ent->len = (size_t)(ent->pkt_num - ack_ent->largest_ack); + } + } + + ngtcp2_ringbuf_resize(rb, ack_ent_offset); +} + +void ngtcp2_acktr_recv_ack(ngtcp2_acktr *acktr, const ngtcp2_ack *fr) { + ngtcp2_acktr_ack_entry *ent; + int64_t largest_ack = fr->largest_ack, min_ack; + size_t i, j; + ngtcp2_ringbuf *rb = &acktr->acks; + size_t nacks = ngtcp2_ringbuf_len(rb); + + /* Assume that ngtcp2_pkt_validate_ack(fr) returns 0 */ + for (j = 0; j < nacks; ++j) { + ent = ngtcp2_ringbuf_get(rb, j); + if (largest_ack >= ent->pkt_num) { + break; + } + } + if (j == nacks) { + return; + } + + min_ack = largest_ack - (int64_t)fr->first_ack_range; + + if (min_ack <= ent->pkt_num && ent->pkt_num <= largest_ack) { + acktr_on_ack(acktr, rb, j); + return; + } + + for (i = 0; i < fr->rangecnt && j < nacks; ++i) { + largest_ack = min_ack - (int64_t)fr->ranges[i].gap - 2; + min_ack = largest_ack - (int64_t)fr->ranges[i].len; + + for (;;) { + if (ent->pkt_num > largest_ack) { + ++j; + if (j == nacks) { + return; + } + ent = ngtcp2_ringbuf_get(rb, j); + continue; + } + if (ent->pkt_num < min_ack) { + break; + } + acktr_on_ack(acktr, rb, j); + return; + } + } +} + +void ngtcp2_acktr_commit_ack(ngtcp2_acktr *acktr) { + acktr->flags &= (uint16_t) ~(NGTCP2_ACKTR_FLAG_ACTIVE_ACK | + NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK | + NGTCP2_ACKTR_FLAG_CANCEL_TIMER); + acktr->first_unacked_ts = UINT64_MAX; + acktr->rx_npkt = 0; +} + +int ngtcp2_acktr_require_active_ack(ngtcp2_acktr *acktr, + ngtcp2_duration max_ack_delay, + ngtcp2_tstamp ts) { + return acktr->first_unacked_ts != UINT64_MAX && + acktr->first_unacked_ts + max_ack_delay <= ts; +} + +void ngtcp2_acktr_immediate_ack(ngtcp2_acktr *acktr) { + acktr->flags |= NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h new file mode 100644 index 0000000..70a3c71 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h @@ -0,0 +1,221 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_ACKTR_H +#define NGTCP2_ACKTR_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" +#include "ngtcp2_ringbuf.h" +#include "ngtcp2_ksl.h" +#include "ngtcp2_pkt.h" +#include "ngtcp2_objalloc.h" + +/* NGTCP2_ACKTR_MAX_ENT is the maximum number of ngtcp2_acktr_entry + which ngtcp2_acktr stores. */ +#define NGTCP2_ACKTR_MAX_ENT 1024 + +typedef struct ngtcp2_log ngtcp2_log; + +/* + * ngtcp2_acktr_entry is a range of packets which need to be acked. + */ +typedef struct ngtcp2_acktr_entry { + union { + struct { + /* pkt_num is the largest packet number to acknowledge in this + range. */ + int64_t pkt_num; + /* len is the consecutive packets started from pkt_num which + includes pkt_num itself counting in decreasing order. So pkt_num + = 987 and len = 2, this entry includes packet 987 and 986. */ + size_t len; + /* tstamp is the timestamp when a packet denoted by pkt_num is + received. */ + ngtcp2_tstamp tstamp; + }; + + ngtcp2_opl_entry oplent; + }; +} ngtcp2_acktr_entry; + +ngtcp2_objalloc_def(acktr_entry, ngtcp2_acktr_entry, oplent); + +/* + * ngtcp2_acktr_entry_objalloc_new allocates memory for ent, and + * initializes it with the given parameters. The pointer to the + * allocated object is stored to |*ent|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_acktr_entry_objalloc_new(ngtcp2_acktr_entry **ent, int64_t pkt_num, + ngtcp2_tstamp tstamp, + ngtcp2_objalloc *objalloc); + +/* + * ngtcp2_acktr_entry_objalloc_del deallocates memory allocated for + * |ent|. + */ +void ngtcp2_acktr_entry_objalloc_del(ngtcp2_acktr_entry *ent, + ngtcp2_objalloc *objalloc); + +typedef struct ngtcp2_acktr_ack_entry { + /* largest_ack is the largest packet number in outgoing ACK frame */ + int64_t largest_ack; + /* pkt_num is the packet number that ACK frame is included. */ + int64_t pkt_num; +} ngtcp2_acktr_ack_entry; + +/* NGTCP2_ACKTR_FLAG_NONE indicates that no flag set. */ +#define NGTCP2_ACKTR_FLAG_NONE 0x00u +/* NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK indicates that immediate + acknowledgement is required. */ +#define NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK 0x01u +/* NGTCP2_ACKTR_FLAG_ACTIVE_ACK indicates that there are pending + protected packet to be acknowledged. */ +#define NGTCP2_ACKTR_FLAG_ACTIVE_ACK 0x02u +/* NGTCP2_ACKTR_FLAG_CANCEL_TIMER is set when ACK delay timer is + expired and canceled. */ +#define NGTCP2_ACKTR_FLAG_CANCEL_TIMER 0x0100u + +/* + * ngtcp2_acktr tracks received packets which we have to send ack. + */ +typedef struct ngtcp2_acktr { + ngtcp2_objalloc objalloc; + ngtcp2_ringbuf acks; + /* ents includes ngtcp2_acktr_entry sorted by decreasing order of + packet number. */ + ngtcp2_ksl ents; + ngtcp2_log *log; + const ngtcp2_mem *mem; + /* flags is bitwise OR of zero, or more of NGTCP2_ACKTR_FLAG_*. */ + uint16_t flags; + /* first_unacked_ts is timestamp when ngtcp2_acktr_entry is added + first time after the last outgoing ACK frame. */ + ngtcp2_tstamp first_unacked_ts; + /* rx_npkt is the number of ACK eliciting packets received without + sending ACK. */ + size_t rx_npkt; +} ngtcp2_acktr; + +/* + * ngtcp2_acktr_init initializes |acktr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log, + const ngtcp2_mem *mem); + +/* + * ngtcp2_acktr_free frees resources allocated for |acktr|. It frees + * any ngtcp2_acktr_entry added to |acktr|. + */ +void ngtcp2_acktr_free(ngtcp2_acktr *acktr); + +/* + * ngtcp2_acktr_add adds packet number |pkt_num| to |acktr|. + * |active_ack| is nonzero if |pkt_num| is retransmittable packet. + * + * This function assumes that |acktr| does not contain |pkt_num|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * OUt of memory. + */ +int ngtcp2_acktr_add(ngtcp2_acktr *acktr, int64_t pkt_num, int active_ack, + ngtcp2_tstamp ts); + +/* + * ngtcp2_acktr_forget removes all entries which have the packet + * number that is equal to or less than ent->pkt_num. This function + * assumes that |acktr| includes |ent|. + */ +void ngtcp2_acktr_forget(ngtcp2_acktr *acktr, ngtcp2_acktr_entry *ent); + +/* + * ngtcp2_acktr_get returns the pointer to pointer to the entry which + * has the largest packet number to be acked. If there is no entry, + * returned value satisfies ngtcp2_ksl_it_end(&it) != 0. + */ +ngtcp2_ksl_it ngtcp2_acktr_get(ngtcp2_acktr *acktr); + +/* + * ngtcp2_acktr_empty returns nonzero if it has no packet to + * acknowledge. + */ +int ngtcp2_acktr_empty(ngtcp2_acktr *acktr); + +/* + * ngtcp2_acktr_add_ack records outgoing ACK frame whose largest + * acknowledged packet number is |largest_ack|. |pkt_num| is the + * packet number of a packet in which ACK frame is included. This + * function returns a pointer to the object it adds. + */ +ngtcp2_acktr_ack_entry * +ngtcp2_acktr_add_ack(ngtcp2_acktr *acktr, int64_t pkt_num, int64_t largest_ack); + +/* + * ngtcp2_acktr_recv_ack processes the incoming ACK frame |fr|. + * |pkt_num| is a packet number which includes |fr|. If we receive + * ACK which acknowledges the ACKs added by ngtcp2_acktr_add_ack, + * ngtcp2_acktr_entry which the outgoing ACK acknowledges is removed. + */ +void ngtcp2_acktr_recv_ack(ngtcp2_acktr *acktr, const ngtcp2_ack *fr); + +/* + * ngtcp2_acktr_commit_ack tells |acktr| that ACK frame is generated. + */ +void ngtcp2_acktr_commit_ack(ngtcp2_acktr *acktr); + +/* + * ngtcp2_acktr_require_active_ack returns nonzero if ACK frame should + * be generated actively. + */ +int ngtcp2_acktr_require_active_ack(ngtcp2_acktr *acktr, + ngtcp2_duration max_ack_delay, + ngtcp2_tstamp ts); + +/* + * ngtcp2_acktr_immediate_ack tells |acktr| that immediate + * acknowledgement is required. + */ +void ngtcp2_acktr_immediate_ack(ngtcp2_acktr *acktr); + +#endif /* NGTCP2_ACKTR_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c new file mode 100644 index 0000000..f389abe --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c @@ -0,0 +1,117 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_addr.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_unreachable.h" + +ngtcp2_addr *ngtcp2_addr_init(ngtcp2_addr *dest, const ngtcp2_sockaddr *addr, + ngtcp2_socklen addrlen) { + dest->addrlen = addrlen; + dest->addr = (ngtcp2_sockaddr *)addr; + return dest; +} + +void ngtcp2_addr_copy(ngtcp2_addr *dest, const ngtcp2_addr *src) { + dest->addrlen = src->addrlen; + if (src->addrlen) { + memcpy(dest->addr, src->addr, (size_t)src->addrlen); + } +} + +void ngtcp2_addr_copy_byte(ngtcp2_addr *dest, const ngtcp2_sockaddr *addr, + ngtcp2_socklen addrlen) { + dest->addrlen = addrlen; + if (addrlen) { + memcpy(dest->addr, addr, (size_t)addrlen); + } +} + +static int sockaddr_eq(const ngtcp2_sockaddr *a, const ngtcp2_sockaddr *b) { + assert(a->sa_family == b->sa_family); + + switch (a->sa_family) { + case NGTCP2_AF_INET: { + const ngtcp2_sockaddr_in *ai = (const ngtcp2_sockaddr_in *)(void *)a, + *bi = (const ngtcp2_sockaddr_in *)(void *)b; + return ai->sin_port == bi->sin_port && + memcmp(&ai->sin_addr, &bi->sin_addr, sizeof(ai->sin_addr)) == 0; + } + case NGTCP2_AF_INET6: { + const ngtcp2_sockaddr_in6 *ai = (const ngtcp2_sockaddr_in6 *)(void *)a, + *bi = (const ngtcp2_sockaddr_in6 *)(void *)b; + return ai->sin6_port == bi->sin6_port && + memcmp(&ai->sin6_addr, &bi->sin6_addr, sizeof(ai->sin6_addr)) == 0; + } + default: + ngtcp2_unreachable(); + } +} + +int ngtcp2_addr_eq(const ngtcp2_addr *a, const ngtcp2_addr *b) { + return a->addr->sa_family == b->addr->sa_family && + sockaddr_eq(a->addr, b->addr); +} + +uint32_t ngtcp2_addr_compare(const ngtcp2_addr *aa, const ngtcp2_addr *bb) { + uint32_t flags = NGTCP2_ADDR_COMPARE_FLAG_NONE; + const ngtcp2_sockaddr *a = aa->addr; + const ngtcp2_sockaddr *b = bb->addr; + + if (a->sa_family != b->sa_family) { + return NGTCP2_ADDR_COMPARE_FLAG_FAMILY; + } + + switch (a->sa_family) { + case NGTCP2_AF_INET: { + const ngtcp2_sockaddr_in *ai = (const ngtcp2_sockaddr_in *)(void *)a, + *bi = (const ngtcp2_sockaddr_in *)(void *)b; + if (memcmp(&ai->sin_addr, &bi->sin_addr, sizeof(ai->sin_addr))) { + flags |= NGTCP2_ADDR_COMPARE_FLAG_ADDR; + } + if (ai->sin_port != bi->sin_port) { + flags |= NGTCP2_ADDR_COMPARE_FLAG_PORT; + } + return flags; + } + case NGTCP2_AF_INET6: { + const ngtcp2_sockaddr_in6 *ai = (const ngtcp2_sockaddr_in6 *)(void *)a, + *bi = (const ngtcp2_sockaddr_in6 *)(void *)b; + if (memcmp(&ai->sin6_addr, &bi->sin6_addr, sizeof(ai->sin6_addr))) { + flags |= NGTCP2_ADDR_COMPARE_FLAG_ADDR; + } + if (ai->sin6_port != bi->sin6_port) { + flags |= NGTCP2_ADDR_COMPARE_FLAG_PORT; + } + return flags; + } + default: + ngtcp2_unreachable(); + } +} + +int ngtcp2_addr_empty(const ngtcp2_addr *addr) { return addr->addrlen == 0; } diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h new file mode 100644 index 0000000..f1d7f7b --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h @@ -0,0 +1,69 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_ADDR_H +#define NGTCP2_ADDR_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +/* + * ngtcp2_addr_copy copies |src| to |dest|. This function assumes + * that dest->addr points to a buffer which have sufficient size to + * store the copy. + */ +void ngtcp2_addr_copy(ngtcp2_addr *dest, const ngtcp2_addr *src); + +/* + * ngtcp2_addr_eq returns nonzero if |a| equals |b|. + */ +int ngtcp2_addr_eq(const ngtcp2_addr *a, const ngtcp2_addr *b); + +/* NGTCP2_ADDR_COMPARE_FLAG_NONE indicates that no flag set. */ +#define NGTCP2_ADDR_COMPARE_FLAG_NONE 0x0u +/* NGTCP2_ADDR_COMPARE_FLAG_ADDR indicates IP addresses do not + match. */ +#define NGTCP2_ADDR_COMPARE_FLAG_ADDR 0x1u +/* NGTCP2_ADDR_COMPARE_FLAG_PORT indicates ports do not match. */ +#define NGTCP2_ADDR_COMPARE_FLAG_PORT 0x2u +/* NGTCP2_ADDR_COMPARE_FLAG_FAMILY indicates address families do not + match. */ +#define NGTCP2_ADDR_COMPARE_FLAG_FAMILY 0x4u + +/* + * ngtcp2_addr_compare compares address and port between |a| and |b|, + * and returns zero or more of NGTCP2_ADDR_COMPARE_FLAG_*. + */ +uint32_t ngtcp2_addr_compare(const ngtcp2_addr *a, const ngtcp2_addr *b); + +/* + * ngtcp2_addr_empty returns nonzero if |addr| has zero length + * address. + */ +int ngtcp2_addr_empty(const ngtcp2_addr *addr); + +#endif /* NGTCP2_ADDR_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c new file mode 100644 index 0000000..5cc39ee --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c @@ -0,0 +1,90 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_balloc.h" + +#include <assert.h> + +#include "ngtcp2_mem.h" + +void ngtcp2_balloc_init(ngtcp2_balloc *balloc, size_t blklen, + const ngtcp2_mem *mem) { + assert((blklen & 0xfu) == 0); + + balloc->mem = mem; + balloc->blklen = blklen; + balloc->head = NULL; + ngtcp2_buf_init(&balloc->buf, (void *)"", 0); +} + +void ngtcp2_balloc_free(ngtcp2_balloc *balloc) { + if (balloc == NULL) { + return; + } + + ngtcp2_balloc_clear(balloc); +} + +void ngtcp2_balloc_clear(ngtcp2_balloc *balloc) { + ngtcp2_memblock_hd *p, *next; + + for (p = balloc->head; p; p = next) { + next = p->next; + ngtcp2_mem_free(balloc->mem, p); + } + + balloc->head = NULL; + ngtcp2_buf_init(&balloc->buf, (void *)"", 0); +} + +int ngtcp2_balloc_get(ngtcp2_balloc *balloc, void **pbuf, size_t n) { + uint8_t *p; + ngtcp2_memblock_hd *hd; + + assert(n <= balloc->blklen); + + if (ngtcp2_buf_left(&balloc->buf) < n) { + p = ngtcp2_mem_malloc(balloc->mem, + sizeof(ngtcp2_memblock_hd) + 0x10u + balloc->blklen); + if (p == NULL) { + return NGTCP2_ERR_NOMEM; + } + + hd = (ngtcp2_memblock_hd *)(void *)p; + hd->next = balloc->head; + balloc->head = hd; + ngtcp2_buf_init( + &balloc->buf, + (uint8_t *)(((uintptr_t)p + sizeof(ngtcp2_memblock_hd) + 0xfu) & + ~(uintptr_t)0xfu), + balloc->blklen); + } + + assert(((uintptr_t)balloc->buf.last & 0xfu) == 0); + + *pbuf = balloc->buf.last; + balloc->buf.last += (n + 0xfu) & ~(uintptr_t)0xfu; + + return 0; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h new file mode 100644 index 0000000..1fb1632 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h @@ -0,0 +1,91 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_BALLOC_H +#define NGTCP2_BALLOC_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_buf.h" + +typedef struct ngtcp2_memblock_hd ngtcp2_memblock_hd; + +/* + * ngtcp2_memblock_hd is the header of memory block. + */ +struct ngtcp2_memblock_hd { + ngtcp2_memblock_hd *next; +}; + +/* + * ngtcp2_balloc is a custom memory allocator. It allocates |blklen| + * bytes of memory at once on demand, and returns its slice when the + * allocation is requested. + */ +typedef struct ngtcp2_balloc { + /* mem is the underlying memory allocator. */ + const ngtcp2_mem *mem; + /* blklen is the size of memory block. */ + size_t blklen; + /* head points to the list of memory block allocated so far. */ + ngtcp2_memblock_hd *head; + /* buf wraps the current memory block for allocation requests. */ + ngtcp2_buf buf; +} ngtcp2_balloc; + +/* + * ngtcp2_balloc_init initializes |balloc| with |blklen| which is the + * size of memory block. + */ +void ngtcp2_balloc_init(ngtcp2_balloc *balloc, size_t blklen, + const ngtcp2_mem *mem); + +/* + * ngtcp2_balloc_free releases all allocated memory blocks. + */ +void ngtcp2_balloc_free(ngtcp2_balloc *balloc); + +/* + * ngtcp2_balloc_get allocates |n| bytes of memory and assigns its + * pointer to |*pbuf|. + * + * It returns 0 if it succeeds, or one of the following negative error + * codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_balloc_get(ngtcp2_balloc *balloc, void **pbuf, size_t n); + +/* + * ngtcp2_balloc_clear releases all allocated memory blocks and + * initializes its state. + */ +void ngtcp2_balloc_clear(ngtcp2_balloc *balloc); + +#endif /* NGTCP2_BALLOC_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c new file mode 100644 index 0000000..ed26d3e --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c @@ -0,0 +1,692 @@ +/* + * ngtcp2 + * + * Copyright (c) 2021 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_bbr.h" + +#include <assert.h> + +#include "ngtcp2_log.h" +#include "ngtcp2_macro.h" +#include "ngtcp2_mem.h" +#include "ngtcp2_rcvry.h" +#include "ngtcp2_rst.h" + +static const double pacing_gain_cycle[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1}; + +#define NGTCP2_BBR_GAIN_CYCLELEN ngtcp2_arraylen(pacing_gain_cycle) + +#define NGTCP2_BBR_HIGH_GAIN 2.89 +#define NGTCP2_BBR_PROBE_RTT_DURATION (200 * NGTCP2_MILLISECONDS) +#define NGTCP2_RTPROP_FILTERLEN (10 * NGTCP2_SECONDS) +#define NGTCP2_BBR_BTL_BW_FILTERLEN 10 + +static void bbr_update_on_ack(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); +static void bbr_update_model_and_state(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts); +static void bbr_update_control_parameters(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); +static void bbr_on_transmit(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat); +static void bbr_init_round_counting(ngtcp2_bbr_cc *cc); +static void bbr_update_round(ngtcp2_bbr_cc *cc, const ngtcp2_cc_ack *ack); +static void bbr_update_btl_bw(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); +static void bbr_update_rtprop(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); +static void bbr_init_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat); +static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + double pacing_gain); +static void bbr_set_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat); +static void bbr_set_send_quantum(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat); +static void bbr_update_target_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat); +static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); +static void bbr_save_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat); +static void bbr_restore_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat); +static void bbr_modulate_cwnd_for_probe_rtt(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat); +static void bbr_set_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); +static void bbr_init(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp initial_ts); +static void bbr_enter_startup(ngtcp2_bbr_cc *cc); +static void bbr_init_full_pipe(ngtcp2_bbr_cc *cc); +static void bbr_check_full_pipe(ngtcp2_bbr_cc *cc); +static void bbr_enter_drain(ngtcp2_bbr_cc *cc); +static void bbr_check_drain(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); +static void bbr_enter_probe_bw(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts); +static void bbr_check_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); +static void bbr_advance_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts); +static int bbr_is_next_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); +static void bbr_handle_restart_from_idle(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat); +static void bbr_check_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); +static void bbr_enter_probe_rtt(ngtcp2_bbr_cc *cc); +static void bbr_handle_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); +static void bbr_exit_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts); + +void ngtcp2_bbr_cc_init(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_rst *rst, ngtcp2_tstamp initial_ts, + ngtcp2_rand rand, const ngtcp2_rand_ctx *rand_ctx, + ngtcp2_log *log) { + cc->ccb.log = log; + cc->rst = rst; + cc->rand = rand; + cc->rand_ctx = *rand_ctx; + cc->initial_cwnd = cstat->cwnd; + bbr_init(cc, cstat, initial_ts); +} + +void ngtcp2_bbr_cc_free(ngtcp2_bbr_cc *cc) { (void)cc; } + +int ngtcp2_cc_bbr_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + ngtcp2_conn_stat *cstat, ngtcp2_rst *rst, + ngtcp2_tstamp initial_ts, ngtcp2_rand rand, + const ngtcp2_rand_ctx *rand_ctx, + const ngtcp2_mem *mem) { + ngtcp2_bbr_cc *bbr_cc; + + bbr_cc = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_bbr_cc)); + if (bbr_cc == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_bbr_cc_init(bbr_cc, cstat, rst, initial_ts, rand, rand_ctx, log); + + cc->ccb = &bbr_cc->ccb; + cc->on_pkt_acked = ngtcp2_cc_bbr_cc_on_pkt_acked; + cc->congestion_event = ngtcp2_cc_bbr_cc_congestion_event; + cc->on_spurious_congestion = ngtcp2_cc_bbr_cc_on_spurious_congestion; + cc->on_persistent_congestion = ngtcp2_cc_bbr_cc_on_persistent_congestion; + cc->on_ack_recv = ngtcp2_cc_bbr_cc_on_ack_recv; + cc->on_pkt_sent = ngtcp2_cc_bbr_cc_on_pkt_sent; + cc->new_rtt_sample = ngtcp2_cc_bbr_cc_new_rtt_sample; + cc->reset = ngtcp2_cc_bbr_cc_reset; + cc->event = ngtcp2_cc_bbr_cc_event; + + return 0; +} + +void ngtcp2_cc_bbr_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) { + ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(cc->ccb, ngtcp2_bbr_cc, ccb); + + ngtcp2_bbr_cc_free(bbr_cc); + ngtcp2_mem_free(mem, bbr_cc); +} + +void ngtcp2_cc_bbr_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) { + (void)ccx; + (void)cstat; + (void)pkt; + (void)ts; +} + +static int in_congestion_recovery(const ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_time) { + return cstat->congestion_recovery_start_ts != UINT64_MAX && + sent_time <= cstat->congestion_recovery_start_ts; +} + +void ngtcp2_cc_bbr_cc_congestion_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, + ngtcp2_tstamp ts) { + ngtcp2_bbr_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb); + + if (cc->in_loss_recovery || cc->congestion_recovery_start_ts != UINT64_MAX || + in_congestion_recovery(cstat, sent_ts)) { + return; + } + + cc->congestion_recovery_start_ts = ts; +} + +void ngtcp2_cc_bbr_cc_on_spurious_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_bbr_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb); + (void)ts; + + cc->congestion_recovery_start_ts = UINT64_MAX; + cstat->congestion_recovery_start_ts = UINT64_MAX; + + if (cc->in_loss_recovery) { + cc->in_loss_recovery = 0; + cc->packet_conservation = 0; + bbr_restore_cwnd(cc, cstat); + } +} + +void ngtcp2_cc_bbr_cc_on_persistent_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_bbr_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb); + (void)ts; + + cstat->congestion_recovery_start_ts = UINT64_MAX; + cc->congestion_recovery_start_ts = UINT64_MAX; + cc->in_loss_recovery = 0; + cc->packet_conservation = 0; + + bbr_save_cwnd(cc, cstat); + cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; +} + +void ngtcp2_cc_bbr_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb); + + bbr_update_on_ack(bbr_cc, cstat, ack, ts); +} + +void ngtcp2_cc_bbr_cc_on_pkt_sent(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt) { + ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb); + (void)pkt; + + bbr_on_transmit(bbr_cc, cstat); +} + +void ngtcp2_cc_bbr_cc_new_rtt_sample(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + (void)ccx; + (void)cstat; + (void)ts; +} + +void ngtcp2_cc_bbr_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb); + bbr_init(bbr_cc, cstat, ts); +} + +void ngtcp2_cc_bbr_cc_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_cc_event_type event, ngtcp2_tstamp ts) { + (void)ccx; + (void)cstat; + (void)event; + (void)ts; +} + +static void bbr_update_on_ack(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + bbr_update_model_and_state(cc, cstat, ack, ts); + bbr_update_control_parameters(cc, cstat, ack); +} + +static void bbr_update_model_and_state(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts) { + bbr_update_btl_bw(cc, cstat, ack); + bbr_check_cycle_phase(cc, cstat, ack, ts); + bbr_check_full_pipe(cc); + bbr_check_drain(cc, cstat, ts); + bbr_update_rtprop(cc, cstat, ts); + bbr_check_probe_rtt(cc, cstat, ts); +} + +static void bbr_update_control_parameters(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + bbr_set_pacing_rate(cc, cstat); + bbr_set_send_quantum(cc, cstat); + bbr_set_cwnd(cc, cstat, ack); +} + +static void bbr_on_transmit(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) { + bbr_handle_restart_from_idle(cc, cstat); +} + +static void bbr_init_round_counting(ngtcp2_bbr_cc *cc) { + cc->next_round_delivered = 0; + cc->round_start = 0; + cc->round_count = 0; +} + +static void bbr_update_round(ngtcp2_bbr_cc *cc, const ngtcp2_cc_ack *ack) { + if (ack->pkt_delivered >= cc->next_round_delivered) { + cc->next_round_delivered = cc->rst->delivered; + ++cc->round_count; + cc->round_start = 1; + + return; + } + + cc->round_start = 0; +} + +static void bbr_handle_recovery(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + if (cc->in_loss_recovery) { + if (ack->pkt_delivered >= cc->congestion_recovery_next_round_delivered) { + cc->packet_conservation = 0; + } + + if (!in_congestion_recovery(cstat, ack->largest_acked_sent_ts)) { + cc->in_loss_recovery = 0; + cc->packet_conservation = 0; + bbr_restore_cwnd(cc, cstat); + } + + return; + } + + if (cc->congestion_recovery_start_ts != UINT64_MAX) { + cc->in_loss_recovery = 1; + bbr_save_cwnd(cc, cstat); + cstat->cwnd = + cstat->bytes_in_flight + + ngtcp2_max(ack->bytes_delivered, cstat->max_tx_udp_payload_size); + + cstat->congestion_recovery_start_ts = cc->congestion_recovery_start_ts; + cc->congestion_recovery_start_ts = UINT64_MAX; + cc->packet_conservation = 1; + cc->congestion_recovery_next_round_delivered = cc->rst->delivered; + } +} + +static void bbr_update_btl_bw(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + bbr_update_round(cc, ack); + bbr_handle_recovery(cc, cstat, ack); + + if (cstat->delivery_rate_sec < cc->btl_bw && cc->rst->rs.is_app_limited) { + return; + } + + ngtcp2_window_filter_update(&cc->btl_bw_filter, cstat->delivery_rate_sec, + cc->round_count); + + cc->btl_bw = ngtcp2_window_filter_get_best(&cc->btl_bw_filter); +} + +static void bbr_update_rtprop(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + cc->rtprop_expired = ts > cc->rtprop_stamp + NGTCP2_RTPROP_FILTERLEN; + + /* Need valid RTT sample */ + if (cstat->latest_rtt && + (cstat->latest_rtt <= cc->rt_prop || cc->rtprop_expired)) { + cc->rt_prop = cstat->latest_rtt; + cc->rtprop_stamp = ts; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr update RTprop=%" PRIu64, cc->rt_prop); + } +} + +static void bbr_init_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) { + double nominal_bandwidth = + (double)cc->initial_cwnd / (double)NGTCP2_MILLISECONDS; + + cstat->pacing_rate = cc->pacing_gain * nominal_bandwidth; +} + +static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + double pacing_gain) { + double rate = pacing_gain * (double)cc->btl_bw / NGTCP2_SECONDS; + + if (cc->filled_pipe || rate > cstat->pacing_rate) { + cstat->pacing_rate = rate; + } +} + +static void bbr_set_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) { + bbr_set_pacing_rate_with_gain(cc, cstat, cc->pacing_gain); +} + +static void bbr_set_send_quantum(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) { + uint64_t send_quantum; + (void)cc; + + if (cstat->pacing_rate < 1.2 * 1024 * 1024 / 8 / NGTCP2_SECONDS) { + cstat->send_quantum = cstat->max_tx_udp_payload_size; + } else if (cstat->pacing_rate < 24.0 * 1024 * 1024 / 8 / NGTCP2_SECONDS) { + cstat->send_quantum = cstat->max_tx_udp_payload_size * 2; + } else { + send_quantum = + (uint64_t)(cstat->pacing_rate * (double)(cstat->min_rtt == UINT64_MAX + ? NGTCP2_MILLISECONDS + : cstat->min_rtt)); + cstat->send_quantum = (size_t)ngtcp2_min(send_quantum, 64 * 1024); + } + + cstat->send_quantum = + ngtcp2_max(cstat->send_quantum, cstat->max_tx_udp_payload_size * 10); +} + +static uint64_t bbr_inflight(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + double gain) { + uint64_t quanta = 3 * cstat->send_quantum; + double estimated_bdp; + + if (cc->rt_prop == UINT64_MAX) { + /* no valid RTT samples yet */ + return cc->initial_cwnd; + } + + estimated_bdp = (double)cc->btl_bw * (double)cc->rt_prop / NGTCP2_SECONDS; + + return (uint64_t)(gain * estimated_bdp) + quanta; +} + +static void bbr_update_target_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) { + cc->target_cwnd = bbr_inflight(cc, cstat, cc->cwnd_gain); +} + +static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + if (ack->bytes_lost > 0) { + if (cstat->cwnd > ack->bytes_lost) { + cstat->cwnd -= ack->bytes_lost; + cstat->cwnd = ngtcp2_max(cstat->cwnd, 2 * cstat->max_tx_udp_payload_size); + } else { + cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; + } + } + + if (cc->packet_conservation) { + cstat->cwnd = + ngtcp2_max(cstat->cwnd, cstat->bytes_in_flight + ack->bytes_delivered); + } +} + +static void bbr_save_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) { + if (!cc->in_loss_recovery && cc->state != NGTCP2_BBR_STATE_PROBE_RTT) { + cc->prior_cwnd = cstat->cwnd; + return; + } + + cc->prior_cwnd = ngtcp2_max(cc->prior_cwnd, cstat->cwnd); +} + +static void bbr_restore_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) { + cstat->cwnd = ngtcp2_max(cstat->cwnd, cc->prior_cwnd); +} + +static uint64_t min_pipe_cwnd(size_t max_udp_payload_size) { + return max_udp_payload_size * 4; +} + +static void bbr_modulate_cwnd_for_probe_rtt(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat) { + if (cc->state == NGTCP2_BBR_STATE_PROBE_RTT) { + cstat->cwnd = + ngtcp2_min(cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); + } +} + +static void bbr_set_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + bbr_update_target_cwnd(cc, cstat); + bbr_modulate_cwnd_for_recovery(cc, cstat, ack); + + if (!cc->packet_conservation) { + if (cc->filled_pipe) { + cstat->cwnd = + ngtcp2_min(cstat->cwnd + ack->bytes_delivered, cc->target_cwnd); + } else if (cstat->cwnd < cc->target_cwnd || + cc->rst->delivered < cc->initial_cwnd) { + cstat->cwnd += ack->bytes_delivered; + } + + cstat->cwnd = + ngtcp2_max(cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); + } + + bbr_modulate_cwnd_for_probe_rtt(cc, cstat); +} + +static void bbr_init(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp initial_ts) { + cc->pacing_gain = NGTCP2_BBR_HIGH_GAIN; + cc->prior_cwnd = 0; + cc->target_cwnd = 0; + cc->btl_bw = 0; + cc->rt_prop = UINT64_MAX; + cc->rtprop_stamp = initial_ts; + cc->cycle_stamp = UINT64_MAX; + cc->probe_rtt_done_stamp = UINT64_MAX; + cc->cycle_index = 0; + cc->rtprop_expired = 0; + cc->idle_restart = 0; + cc->packet_conservation = 0; + cc->probe_rtt_round_done = 0; + + cc->congestion_recovery_start_ts = UINT64_MAX; + cc->congestion_recovery_next_round_delivered = 0; + cc->in_loss_recovery = 0; + + cstat->send_quantum = cstat->max_tx_udp_payload_size * 10; + + ngtcp2_window_filter_init(&cc->btl_bw_filter, NGTCP2_BBR_BTL_BW_FILTERLEN); + + bbr_init_round_counting(cc); + bbr_init_full_pipe(cc); + bbr_init_pacing_rate(cc, cstat); + bbr_enter_startup(cc); +} + +static void bbr_enter_startup(ngtcp2_bbr_cc *cc) { + cc->state = NGTCP2_BBR_STATE_STARTUP; + cc->pacing_gain = NGTCP2_BBR_HIGH_GAIN; + cc->cwnd_gain = NGTCP2_BBR_HIGH_GAIN; +} + +static void bbr_init_full_pipe(ngtcp2_bbr_cc *cc) { + cc->filled_pipe = 0; + cc->full_bw = 0; + cc->full_bw_count = 0; +} + +static void bbr_check_full_pipe(ngtcp2_bbr_cc *cc) { + if (cc->filled_pipe || !cc->round_start || cc->rst->rs.is_app_limited) { + /* no need to check for a full pipe now. */ + return; + } + + /* cc->btl_bw still growing? */ + if (cc->btl_bw * 100 >= cc->full_bw * 125) { + /* record new baseline level */ + cc->full_bw = cc->btl_bw; + cc->full_bw_count = 0; + return; + } + /* another round w/o much growth */ + ++cc->full_bw_count; + if (cc->full_bw_count >= 3) { + cc->filled_pipe = 1; + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr filled pipe, btl_bw=%" PRIu64, cc->btl_bw); + } +} + +static void bbr_enter_drain(ngtcp2_bbr_cc *cc) { + cc->state = NGTCP2_BBR_STATE_DRAIN; + /* pace slowly */ + cc->pacing_gain = 1.0 / NGTCP2_BBR_HIGH_GAIN; + /* maintain cwnd */ + cc->cwnd_gain = NGTCP2_BBR_HIGH_GAIN; +} + +static void bbr_check_drain(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (cc->state == NGTCP2_BBR_STATE_STARTUP && cc->filled_pipe) { + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr exit Startup and enter Drain"); + + bbr_enter_drain(cc); + } + + if (cc->state == NGTCP2_BBR_STATE_DRAIN && + cstat->bytes_in_flight <= bbr_inflight(cc, cstat, 1.0)) { + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr exit Drain and enter ProbeBW"); + + /* we estimate queue is drained */ + bbr_enter_probe_bw(cc, ts); + } +} + +static void bbr_enter_probe_bw(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts) { + uint8_t rand; + + cc->state = NGTCP2_BBR_STATE_PROBE_BW; + cc->pacing_gain = 1; + cc->cwnd_gain = 2; + + assert(cc->rand); + + cc->rand(&rand, 1, &cc->rand_ctx); + + cc->cycle_index = NGTCP2_BBR_GAIN_CYCLELEN - 1 - (size_t)(rand * 7 / 256); + bbr_advance_cycle_phase(cc, ts); +} + +static void bbr_check_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + if (cc->state == NGTCP2_BBR_STATE_PROBE_BW && + bbr_is_next_cycle_phase(cc, cstat, ack, ts)) { + bbr_advance_cycle_phase(cc, ts); + } +} + +static void bbr_advance_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts) { + cc->cycle_stamp = ts; + cc->cycle_index = (cc->cycle_index + 1) & (NGTCP2_BBR_GAIN_CYCLELEN - 1); + cc->pacing_gain = pacing_gain_cycle[cc->cycle_index]; +} + +static int bbr_is_next_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + int is_full_length = (ts - cc->cycle_stamp) > cc->rt_prop; + + if (cc->pacing_gain > 1) { + return is_full_length && (ack->bytes_lost > 0 || + ack->prior_bytes_in_flight >= + bbr_inflight(cc, cstat, cc->pacing_gain)); + } + + if (cc->pacing_gain < 1) { + return is_full_length || + ack->prior_bytes_in_flight <= bbr_inflight(cc, cstat, 1); + } + + return is_full_length; +} + +static void bbr_handle_restart_from_idle(ngtcp2_bbr_cc *cc, + ngtcp2_conn_stat *cstat) { + if (cstat->bytes_in_flight == 0 && cc->rst->app_limited) { + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr restart from idle"); + + cc->idle_restart = 1; + + if (cc->state == NGTCP2_BBR_STATE_PROBE_BW) { + bbr_set_pacing_rate_with_gain(cc, cstat, 1); + } + } +} + +static void bbr_check_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (cc->state != NGTCP2_BBR_STATE_PROBE_RTT && cc->rtprop_expired && + !cc->idle_restart) { + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr enter ProbeRTT"); + + bbr_enter_probe_rtt(cc); + bbr_save_cwnd(cc, cstat); + cc->probe_rtt_done_stamp = UINT64_MAX; + } + + if (cc->state == NGTCP2_BBR_STATE_PROBE_RTT) { + bbr_handle_probe_rtt(cc, cstat, ts); + } + + cc->idle_restart = 0; +} + +static void bbr_enter_probe_rtt(ngtcp2_bbr_cc *cc) { + cc->state = NGTCP2_BBR_STATE_PROBE_RTT; + cc->pacing_gain = 1; + cc->cwnd_gain = 1; +} + +static void bbr_handle_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + uint64_t app_limited = cc->rst->delivered + cstat->bytes_in_flight; + + /* Ignore low rate samples during NGTCP2_BBR_STATE_PROBE_RTT. */ + cc->rst->app_limited = app_limited ? app_limited : 1; + + if (cc->probe_rtt_done_stamp == UINT64_MAX && + cstat->bytes_in_flight <= min_pipe_cwnd(cstat->max_tx_udp_payload_size)) { + cc->probe_rtt_done_stamp = ts + NGTCP2_BBR_PROBE_RTT_DURATION; + cc->probe_rtt_round_done = 0; + cc->next_round_delivered = cc->rst->delivered; + + return; + } + + if (cc->probe_rtt_done_stamp != UINT64_MAX) { + if (cc->round_start) { + cc->probe_rtt_round_done = 1; + } + + if (cc->probe_rtt_round_done && ts > cc->probe_rtt_done_stamp) { + cc->rtprop_stamp = ts; + bbr_restore_cwnd(cc, cstat); + bbr_exit_probe_rtt(cc, ts); + } + } +} + +static void bbr_exit_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts) { + if (cc->filled_pipe) { + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr exit ProbeRTT and enter ProbeBW"); + + bbr_enter_probe_bw(cc, ts); + + return; + } + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr exit ProbeRTT and enter Startup"); + + bbr_enter_startup(cc); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h new file mode 100644 index 0000000..7311f05 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h @@ -0,0 +1,156 @@ +/* + * ngtcp2 + * + * Copyright (c) 2021 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_BBR_H +#define NGTCP2_BBR_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_cc.h" +#include "ngtcp2_window_filter.h" + +typedef struct ngtcp2_rst ngtcp2_rst; + +typedef enum ngtcp2_bbr_state { + NGTCP2_BBR_STATE_STARTUP, + NGTCP2_BBR_STATE_DRAIN, + NGTCP2_BBR_STATE_PROBE_BW, + NGTCP2_BBR_STATE_PROBE_RTT, +} ngtcp2_bbr_state; + +/* + * ngtcp2_bbr_cc is BBR congestion controller, described in + * https://tools.ietf.org/html/draft-cardwell-iccrg-bbr-congestion-control-00 + */ +typedef struct ngtcp2_bbr_cc { + ngtcp2_cc_base ccb; + + /* The max filter used to estimate BBR.BtlBw. */ + ngtcp2_window_filter btl_bw_filter; + uint64_t initial_cwnd; + ngtcp2_rst *rst; + ngtcp2_rand rand; + ngtcp2_rand_ctx rand_ctx; + + /* BBR variables */ + + /* The dynamic gain factor used to scale BBR.BtlBw to + produce BBR.pacing_rate. */ + double pacing_gain; + /* The dynamic gain factor used to scale the estimated BDP to produce a + congestion window (cwnd). */ + double cwnd_gain; + uint64_t full_bw; + /* packet.delivered value denoting the end of a packet-timed round trip. */ + uint64_t next_round_delivered; + /* Count of packet-timed round trips. */ + uint64_t round_count; + uint64_t prior_cwnd; + /* target_cwnd is the upper bound on the volume of data BBR + allows in flight. */ + uint64_t target_cwnd; + /* BBR's estimated bottleneck bandwidth available to the + transport flow, estimated from the maximum delivery rate sample in a + sliding window. */ + uint64_t btl_bw; + /* BBR's estimated two-way round-trip propagation delay of + the path, estimated from the windowed minimum recent round-trip delay + sample. */ + ngtcp2_duration rt_prop; + /* The wall clock time at which the current BBR.RTProp + sample was obtained. */ + ngtcp2_tstamp rtprop_stamp; + ngtcp2_tstamp cycle_stamp; + ngtcp2_tstamp probe_rtt_done_stamp; + /* congestion_recovery_start_ts is the time when congestion recovery + period started.*/ + ngtcp2_tstamp congestion_recovery_start_ts; + uint64_t congestion_recovery_next_round_delivered; + size_t full_bw_count; + size_t cycle_index; + ngtcp2_bbr_state state; + /* A boolean that records whether BBR estimates that it has ever fully + utilized its available bandwidth ("filled the pipe"). */ + int filled_pipe; + /* A boolean that BBR sets to true once per packet-timed round trip, + on ACKs that advance BBR.round_count. */ + int round_start; + int rtprop_expired; + int idle_restart; + int packet_conservation; + int probe_rtt_round_done; + /* in_loss_recovery becomes nonzero when BBR enters loss recovery + period. */ + int in_loss_recovery; +} ngtcp2_bbr_cc; + +int ngtcp2_cc_bbr_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + ngtcp2_conn_stat *cstat, ngtcp2_rst *rst, + ngtcp2_tstamp initial_ts, ngtcp2_rand rand, + const ngtcp2_rand_ctx *rand_ctx, + const ngtcp2_mem *mem); + +void ngtcp2_cc_bbr_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem); + +void ngtcp2_bbr_cc_init(ngtcp2_bbr_cc *bbr_cc, ngtcp2_conn_stat *cstat, + ngtcp2_rst *rst, ngtcp2_tstamp initial_ts, + ngtcp2_rand rand, const ngtcp2_rand_ctx *rand_ctx, + ngtcp2_log *log); + +void ngtcp2_bbr_cc_free(ngtcp2_bbr_cc *cc); + +void ngtcp2_cc_bbr_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts); + +void ngtcp2_cc_bbr_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, ngtcp2_tstamp ts); + +void ngtcp2_cc_bbr_cc_on_spurious_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_bbr_cc_on_persistent_congestion(ngtcp2_cc *cc, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_bbr_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); + +void ngtcp2_cc_bbr_cc_on_pkt_sent(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt); + +void ngtcp2_cc_bbr_cc_new_rtt_sample(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_bbr_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_bbr_cc_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_cc_event_type event, ngtcp2_tstamp ts); + +#endif /* NGTCP2_BBR_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c new file mode 100644 index 0000000..508ab55 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c @@ -0,0 +1,1489 @@ +/* + * ngtcp2 + * + * Copyright (c) 2021 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_bbr2.h" + +#include <assert.h> + +#include "ngtcp2_log.h" +#include "ngtcp2_macro.h" +#include "ngtcp2_mem.h" +#include "ngtcp2_rcvry.h" +#include "ngtcp2_rst.h" + +#define NGTCP2_BBR_MAX_BW_FILTERLEN 2 + +#define NGTCP2_BBR_EXTRA_ACKED_FILTERLEN 10 + +#define NGTCP2_BBR_STARTUP_PACING_GAIN ((double)2.77) + +#define NGTCP2_BBR_STARTUP_CWND_GAIN 2 + +#define NGTCP2_BBR_PROBE_RTT_CWND_GAIN ((double)0.5) + +#define NGTCP2_BBR_BETA_NUMER 7 +#define NGTCP2_BBR_BETA_DENOM 10 + +#define NGTCP2_BBR_LOSS_THRESH_NUMER 2 +#define NGTCP2_BBR_LOSS_THRESH_DENOM 100 + +#define NGTCP2_BBR_HEADROOM_NUMER 15 +#define NGTCP2_BBR_HEADROOM_DENOM 100 + +#define NGTCP2_BBR_PROBE_RTT_INTERVAL (5 * NGTCP2_SECONDS) +#define NGTCP2_BBR_MIN_RTT_FILTERLEN (10 * NGTCP2_SECONDS) + +#define NGTCP2_BBR_PROBE_RTT_DURATION (200 * NGTCP2_MILLISECONDS) + +#define NGTCP2_BBR_PACING_MARGIN_PERCENT 1 + +static void bbr_on_init(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp initial_ts); + +static void bbr_on_transmit(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static void bbr_reset_congestion_signals(ngtcp2_bbr2_cc *bbr); + +static void bbr_reset_lower_bounds(ngtcp2_bbr2_cc *bbr); + +static void bbr_init_round_counting(ngtcp2_bbr2_cc *bbr); + +static void bbr_init_full_pipe(ngtcp2_bbr2_cc *bbr); + +static void bbr_init_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat); + +static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + double pacing_gain); + +static void bbr_set_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat); + +static void bbr_enter_startup(ngtcp2_bbr2_cc *bbr); + +static void bbr_check_startup_done(ngtcp2_bbr2_cc *bbr, + const ngtcp2_cc_ack *ack); + +static void bbr_update_on_ack(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); + +static void bbr_update_model_and_state(ngtcp2_bbr2_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts); + +static void bbr_update_control_parameters(ngtcp2_bbr2_cc *cc, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); + +static void bbr_update_on_loss(ngtcp2_bbr2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts); + +static void bbr_update_latest_delivery_signals(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_advance_latest_delivery_signals(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_update_congestion_signals(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); + +static void bbr_adapt_lower_bounds_from_congestion(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_init_lower_bounds(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat); + +static void bbr_loss_lower_bounds(ngtcp2_bbr2_cc *bbr); + +static void bbr_bound_bw_for_model(ngtcp2_bbr2_cc *bbr); + +static void bbr_update_max_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); + +static void bbr_update_round(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack); + +static void bbr_start_round(ngtcp2_bbr2_cc *bbr); + +static int bbr_is_in_probe_bw_state(ngtcp2_bbr2_cc *bbr); + +static void bbr_update_ack_aggregation(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts); + +static void bbr_enter_drain(ngtcp2_bbr2_cc *bbr); + +static void bbr_check_drain(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static void bbr_enter_probe_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts); + +static void bbr_start_probe_bw_down(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts); + +static void bbr_start_probe_bw_cruise(ngtcp2_bbr2_cc *bbr); + +static void bbr_start_probe_bw_refill(ngtcp2_bbr2_cc *bbr); + +static void bbr_start_probe_bw_up(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static void bbr_update_probe_bw_cycle_phase(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts); + +static int bbr_check_time_to_cruise(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); + +static int bbr_has_elapsed_in_phase(ngtcp2_bbr2_cc *bbr, + ngtcp2_duration interval, ngtcp2_tstamp ts); + +static uint64_t bbr_inflight_with_headroom(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_raise_inflight_hi_slope(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_probe_inflight_hi_upward(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); + +static void bbr_adapt_upper_bounds(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); + +static int bbr_check_time_to_probe_bw(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static void bbr_pick_probe_wait(ngtcp2_bbr2_cc *bbr); + +static int bbr_is_reno_coexistence_probe_time(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static uint64_t bbr_target_inflight(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static int bbr_check_inflight_too_high(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static int is_inflight_too_high(const ngtcp2_rs *rs); + +static void bbr_handle_inflight_too_high(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_rs *rs, ngtcp2_tstamp ts); + +static void bbr_handle_lost_packet(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts); + +static uint64_t bbr_inflight_hi_from_lost_packet(ngtcp2_bbr2_cc *bbr, + const ngtcp2_rs *rs, + const ngtcp2_cc_pkt *pkt); + +static void bbr_update_min_rtt(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts); + +static void bbr_check_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static void bbr_enter_probe_rtt(ngtcp2_bbr2_cc *bbr); + +static void bbr_handle_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static void bbr_check_probe_rtt_done(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); + +static void bbr_mark_connection_app_limited(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_exit_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts); + +static void bbr_handle_restart_from_idle(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +static uint64_t bbr_bdp_multiple(ngtcp2_bbr2_cc *bbr, uint64_t bw, double gain); + +static uint64_t bbr_quantization_budget(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + uint64_t inflight); + +static uint64_t bbr_inflight(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + uint64_t bw, double gain); + +static void bbr_update_max_inflight(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_update_offload_budget(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static uint64_t min_pipe_cwnd(size_t max_udp_payload_size); + +static void bbr_advance_max_bw_filter(ngtcp2_bbr2_cc *bbr); + +static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); + +static void bbr_save_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat); + +static void bbr_restore_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat); + +static uint64_t bbr_probe_rtt_cwnd(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_bound_cwnd_for_probe_rtt(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_set_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); + +static void bbr_bound_cwnd_for_model(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat); + +static void bbr_set_send_quantum(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat); + +static int in_congestion_recovery(const ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_time); + +static void bbr_handle_recovery(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack); + +static void bbr_on_init(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp initial_ts) { + ngtcp2_window_filter_init(&bbr->max_bw_filter, NGTCP2_BBR_MAX_BW_FILTERLEN); + ngtcp2_window_filter_init(&bbr->extra_acked_filter, + NGTCP2_BBR_EXTRA_ACKED_FILTERLEN); + + bbr->min_rtt = UINT64_MAX; + bbr->min_rtt_stamp = initial_ts; + /* remark: Use UINT64_MAX instead of 0 for consistency. */ + bbr->probe_rtt_done_stamp = UINT64_MAX; + bbr->probe_rtt_round_done = 0; + bbr->prior_cwnd = 0; + bbr->idle_restart = 0; + bbr->extra_acked_interval_start = initial_ts; + bbr->extra_acked_delivered = 0; + + bbr_reset_congestion_signals(bbr); + bbr_reset_lower_bounds(bbr); + bbr_init_round_counting(bbr); + bbr_init_full_pipe(bbr); + bbr_init_pacing_rate(bbr, cstat); + bbr_enter_startup(bbr); + + cstat->send_quantum = cstat->max_tx_udp_payload_size * 10; + + /* Missing in documentation */ + bbr->loss_round_start = 0; + bbr->loss_round_delivered = UINT64_MAX; + + bbr->rounds_since_bw_probe = 0; + + bbr->max_bw = 0; + bbr->bw = 0; + + bbr->cycle_count = 0; + + bbr->extra_acked = 0; + + bbr->bytes_lost_in_round = 0; + bbr->loss_events_in_round = 0; + + bbr->offload_budget = 0; + + bbr->probe_up_cnt = UINT64_MAX; + bbr->cycle_stamp = UINT64_MAX; + bbr->ack_phase = 0; + bbr->bw_probe_wait = 0; + bbr->bw_probe_samples = 0; + bbr->bw_probe_up_rounds = 0; + bbr->bw_probe_up_acks = 0; + + bbr->inflight_hi = UINT64_MAX; + bbr->bw_hi = UINT64_MAX; + + bbr->probe_rtt_expired = 0; + bbr->probe_rtt_min_delay = UINT64_MAX; + bbr->probe_rtt_min_stamp = initial_ts; + + bbr->in_loss_recovery = 0; + bbr->packet_conservation = 0; + + bbr->max_inflight = 0; + + bbr->congestion_recovery_start_ts = UINT64_MAX; + bbr->congestion_recovery_next_round_delivered = 0; + + bbr->prior_inflight_lo = 0; + bbr->prior_inflight_hi = 0; + bbr->prior_bw_lo = 0; +} + +static void bbr_reset_congestion_signals(ngtcp2_bbr2_cc *bbr) { + bbr->loss_in_round = 0; + bbr->bw_latest = 0; + bbr->inflight_latest = 0; +} + +static void bbr_reset_lower_bounds(ngtcp2_bbr2_cc *bbr) { + bbr->bw_lo = UINT64_MAX; + bbr->inflight_lo = UINT64_MAX; +} + +static void bbr_init_round_counting(ngtcp2_bbr2_cc *bbr) { + bbr->next_round_delivered = 0; + bbr->round_start = 0; + bbr->round_count = 0; +} + +static void bbr_init_full_pipe(ngtcp2_bbr2_cc *bbr) { + bbr->filled_pipe = 0; + bbr->full_bw = 0; + bbr->full_bw_count = 0; +} + +static void bbr_check_startup_full_bandwidth(ngtcp2_bbr2_cc *bbr) { + if (bbr->filled_pipe || !bbr->round_start || bbr->rst->rs.is_app_limited) { + return; + } + + if (bbr->max_bw * 100 >= bbr->full_bw * 125) { + bbr->full_bw = bbr->max_bw; + bbr->full_bw_count = 0; + } + + ++bbr->full_bw_count; + + if (bbr->full_bw_count >= 3) { + bbr->filled_pipe = 1; + + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr2 filled pipe, full_bw=%" PRIu64, bbr->full_bw); + } +} + +static void bbr_check_startup_high_loss(ngtcp2_bbr2_cc *bbr, + const ngtcp2_cc_ack *ack) { + if (bbr->filled_pipe || !bbr->round_start || bbr->rst->rs.is_app_limited) { + return; + } + + if (bbr->loss_events_in_round <= 3) { + return; + } + + /* loss_thresh = 2% */ + if (bbr->bytes_lost_in_round * 100 <= ack->prior_bytes_in_flight * 2) { + return; + } + + bbr->filled_pipe = 1; +} + +static void bbr_init_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) { + double nominal_bandwidth = (double)bbr->initial_cwnd; + + cstat->pacing_rate = NGTCP2_BBR_STARTUP_PACING_GAIN * nominal_bandwidth / + (double)NGTCP2_MILLISECONDS; +} + +static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + double pacing_gain) { + double rate = pacing_gain * (double)bbr->bw * + (100 - NGTCP2_BBR_PACING_MARGIN_PERCENT) / 100 / NGTCP2_SECONDS; + + if (bbr->filled_pipe || rate > cstat->pacing_rate) { + cstat->pacing_rate = rate; + } +} + +static void bbr_set_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) { + bbr_set_pacing_rate_with_gain(bbr, cstat, bbr->pacing_gain); +} + +static void bbr_enter_startup(ngtcp2_bbr2_cc *bbr) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 enter Startup"); + + bbr->state = NGTCP2_BBR2_STATE_STARTUP; + bbr->pacing_gain = NGTCP2_BBR_STARTUP_PACING_GAIN; + bbr->cwnd_gain = NGTCP2_BBR_STARTUP_CWND_GAIN; +} + +static void bbr_check_startup_done(ngtcp2_bbr2_cc *bbr, + const ngtcp2_cc_ack *ack) { + bbr_check_startup_full_bandwidth(bbr); + bbr_check_startup_high_loss(bbr, ack); + + if (bbr->state == NGTCP2_BBR2_STATE_STARTUP && bbr->filled_pipe) { + bbr_enter_drain(bbr); + } +} + +static void bbr_on_transmit(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + bbr_handle_restart_from_idle(bbr, cstat, ts); +} + +static void bbr_update_on_ack(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + bbr_update_model_and_state(bbr, cstat, ack, ts); + bbr_update_control_parameters(bbr, cstat, ack); +} + +static void bbr_update_model_and_state(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts) { + bbr_update_latest_delivery_signals(bbr, cstat); + bbr_update_congestion_signals(bbr, cstat, ack); + bbr_update_ack_aggregation(bbr, cstat, ack, ts); + bbr_check_startup_done(bbr, ack); + bbr_check_drain(bbr, cstat, ts); + bbr_update_probe_bw_cycle_phase(bbr, cstat, ack, ts); + bbr_update_min_rtt(bbr, ack, ts); + bbr_check_probe_rtt(bbr, cstat, ts); + bbr_advance_latest_delivery_signals(bbr, cstat); + bbr_bound_bw_for_model(bbr); +} + +static void bbr_update_control_parameters(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + bbr_set_pacing_rate(bbr, cstat); + bbr_set_send_quantum(bbr, cstat); + bbr_set_cwnd(bbr, cstat, ack); +} + +static void bbr_update_on_loss(ngtcp2_bbr2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) { + bbr_handle_lost_packet(cc, cstat, pkt, ts); +} + +static void bbr_update_latest_delivery_signals(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + bbr->loss_round_start = 0; + bbr->bw_latest = ngtcp2_max(bbr->bw_latest, cstat->delivery_rate_sec); + bbr->inflight_latest = + ngtcp2_max(bbr->inflight_latest, bbr->rst->rs.delivered); + + if (bbr->rst->rs.prior_delivered >= bbr->loss_round_delivered) { + bbr->loss_round_delivered = bbr->rst->delivered; + bbr->loss_round_start = 1; + } +} + +static void bbr_advance_latest_delivery_signals(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + if (bbr->loss_round_start) { + bbr->bw_latest = cstat->delivery_rate_sec; + bbr->inflight_latest = bbr->rst->rs.delivered; + } +} + +static void bbr_update_congestion_signals(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + bbr_update_max_bw(bbr, cstat, ack); + + if (ack->bytes_lost) { + bbr->bytes_lost_in_round += ack->bytes_lost; + ++bbr->loss_events_in_round; + + if (!bbr->loss_in_round) { + bbr->loss_in_round = 1; + bbr->loss_round_delivered = bbr->rst->delivered; + } + } + + if (!bbr->loss_round_start) { + return; + } + + bbr_adapt_lower_bounds_from_congestion(bbr, cstat); + + bbr->loss_in_round = 0; +} + +static void bbr_adapt_lower_bounds_from_congestion(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + if (!bbr->filled_pipe || bbr_is_in_probe_bw_state(bbr)) { + return; + } + + if (bbr->loss_in_round) { + bbr_init_lower_bounds(bbr, cstat); + bbr_loss_lower_bounds(bbr); + } +} + +static void bbr_init_lower_bounds(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + if (bbr->bw_lo == UINT64_MAX) { + bbr->bw_lo = bbr->max_bw; + } + + if (bbr->inflight_lo == UINT64_MAX) { + bbr->inflight_lo = cstat->cwnd; + } +} + +static void bbr_loss_lower_bounds(ngtcp2_bbr2_cc *bbr) { + bbr->bw_lo = ngtcp2_max(bbr->bw_latest, bbr->bw_lo * NGTCP2_BBR_BETA_NUMER / + NGTCP2_BBR_BETA_DENOM); + bbr->inflight_lo = ngtcp2_max(bbr->inflight_latest, + bbr->inflight_lo * NGTCP2_BBR_BETA_NUMER / + NGTCP2_BBR_BETA_DENOM); +} + +static void bbr_bound_bw_for_model(ngtcp2_bbr2_cc *bbr) { + bbr->bw = ngtcp2_min(bbr->max_bw, bbr->bw_lo); + bbr->bw = ngtcp2_min(bbr->bw, bbr->bw_hi); +} + +static void bbr_update_max_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + bbr_update_round(bbr, ack); + bbr_handle_recovery(bbr, cstat, ack); + + if (cstat->delivery_rate_sec >= bbr->max_bw || !bbr->rst->rs.is_app_limited) { + ngtcp2_window_filter_update(&bbr->max_bw_filter, cstat->delivery_rate_sec, + bbr->cycle_count); + + bbr->max_bw = ngtcp2_window_filter_get_best(&bbr->max_bw_filter); + } +} + +static void bbr_update_round(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack) { + if (ack->pkt_delivered >= bbr->next_round_delivered) { + bbr_start_round(bbr); + + ++bbr->round_count; + ++bbr->rounds_since_bw_probe; + bbr->round_start = 1; + + bbr->bytes_lost_in_round = 0; + bbr->loss_events_in_round = 0; + + bbr->rst->is_cwnd_limited = 0; + + return; + } + + bbr->round_start = 0; +} + +static void bbr_start_round(ngtcp2_bbr2_cc *bbr) { + bbr->next_round_delivered = bbr->rst->delivered; +} + +static int bbr_is_in_probe_bw_state(ngtcp2_bbr2_cc *bbr) { + switch (bbr->state) { + case NGTCP2_BBR2_STATE_PROBE_BW_DOWN: + case NGTCP2_BBR2_STATE_PROBE_BW_CRUISE: + case NGTCP2_BBR2_STATE_PROBE_BW_REFILL: + case NGTCP2_BBR2_STATE_PROBE_BW_UP: + return 1; + default: + return 0; + } +} + +static void bbr_update_ack_aggregation(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts) { + ngtcp2_duration interval = ts - bbr->extra_acked_interval_start; + uint64_t expected_delivered = bbr->bw * interval / NGTCP2_SECONDS; + uint64_t extra; + + if (bbr->extra_acked_delivered <= expected_delivered) { + bbr->extra_acked_delivered = 0; + bbr->extra_acked_interval_start = ts; + expected_delivered = 0; + } + + bbr->extra_acked_delivered += ack->bytes_delivered; + extra = bbr->extra_acked_delivered - expected_delivered; + extra = ngtcp2_min(extra, cstat->cwnd); + + ngtcp2_window_filter_update(&bbr->extra_acked_filter, extra, + bbr->round_count); + + bbr->extra_acked = ngtcp2_window_filter_get_best(&bbr->extra_acked_filter); +} + +static void bbr_enter_drain(ngtcp2_bbr2_cc *bbr) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 enter Drain"); + + bbr->state = NGTCP2_BBR2_STATE_DRAIN; + bbr->pacing_gain = 1. / NGTCP2_BBR_STARTUP_CWND_GAIN; + bbr->cwnd_gain = NGTCP2_BBR_STARTUP_CWND_GAIN; +} + +static void bbr_check_drain(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (bbr->state == NGTCP2_BBR2_STATE_DRAIN && + cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, bbr->bw, 1.0)) { + bbr_enter_probe_bw(bbr, ts); + } +} + +static void bbr_enter_probe_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts) { + bbr_start_probe_bw_down(bbr, ts); +} + +static void bbr_start_probe_bw_down(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr2 start ProbeBW_DOWN"); + + bbr_reset_congestion_signals(bbr); + + bbr->probe_up_cnt = UINT64_MAX; + + bbr_pick_probe_wait(bbr); + + bbr->cycle_stamp = ts; + bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING; + + bbr_start_round(bbr); + + bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_DOWN; + bbr->pacing_gain = 0.9; + bbr->cwnd_gain = 2; +} + +static void bbr_start_probe_bw_cruise(ngtcp2_bbr2_cc *bbr) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr2 start ProbeBW_CRUISE"); + + bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_CRUISE; + bbr->pacing_gain = 1.0; + bbr->cwnd_gain = 2; +} + +static void bbr_start_probe_bw_refill(ngtcp2_bbr2_cc *bbr) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr2 start ProbeBW_REFILL"); + + bbr_reset_lower_bounds(bbr); + + bbr->bw_probe_up_rounds = 0; + bbr->bw_probe_up_acks = 0; + bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_REFILLING; + + bbr_start_round(bbr); + + bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_REFILL; + bbr->pacing_gain = 1.0; + bbr->cwnd_gain = 2; +} + +static void bbr_start_probe_bw_up(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 start ProbeBW_UP"); + + bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STARTING; + + bbr_start_round(bbr); + + bbr->cycle_stamp = ts; + bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_UP; + bbr->pacing_gain = 1.25; + bbr->cwnd_gain = 2; + + bbr_raise_inflight_hi_slope(bbr, cstat); +} + +static void bbr_update_probe_bw_cycle_phase(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts) { + if (!bbr->filled_pipe) { + return; + } + + bbr_adapt_upper_bounds(bbr, cstat, ack, ts); + + if (!bbr_is_in_probe_bw_state(bbr)) { + return; + } + + switch (bbr->state) { + case NGTCP2_BBR2_STATE_PROBE_BW_DOWN: + if (bbr_check_time_to_probe_bw(bbr, cstat, ts)) { + return; + } + + if (bbr_check_time_to_cruise(bbr, cstat, ts)) { + bbr_start_probe_bw_cruise(bbr); + } + + break; + case NGTCP2_BBR2_STATE_PROBE_BW_CRUISE: + if (bbr_check_time_to_probe_bw(bbr, cstat, ts)) { + return; + } + + break; + case NGTCP2_BBR2_STATE_PROBE_BW_REFILL: + if (bbr->round_start) { + bbr->bw_probe_samples = 1; + bbr_start_probe_bw_up(bbr, cstat, ts); + } + + break; + case NGTCP2_BBR2_STATE_PROBE_BW_UP: + if (bbr_has_elapsed_in_phase(bbr, bbr->min_rtt, ts) && + cstat->bytes_in_flight > bbr_inflight(bbr, cstat, bbr->max_bw, 1.25)) { + bbr_start_probe_bw_down(bbr, ts); + } + + break; + default: + break; + } +} + +static int bbr_check_time_to_cruise(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) { + (void)ts; + + if (cstat->bytes_in_flight > bbr_inflight_with_headroom(bbr, cstat)) { + return 0; + } + + if (cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, bbr->max_bw, 1.0)) { + return 1; + } + + return 0; +} + +static int bbr_has_elapsed_in_phase(ngtcp2_bbr2_cc *bbr, + ngtcp2_duration interval, + ngtcp2_tstamp ts) { + return ts > bbr->cycle_stamp + interval; +} + +static uint64_t bbr_inflight_with_headroom(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t headroom; + uint64_t mpcwnd; + if (bbr->inflight_hi == UINT64_MAX) { + return UINT64_MAX; + } + + headroom = ngtcp2_max(cstat->max_tx_udp_payload_size, + bbr->inflight_hi * NGTCP2_BBR_HEADROOM_NUMER / + NGTCP2_BBR_HEADROOM_DENOM); + mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); + + if (bbr->inflight_hi > headroom) { + return ngtcp2_max(bbr->inflight_hi - headroom, mpcwnd); + } + + return mpcwnd; +} + +static void bbr_raise_inflight_hi_slope(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t growth_this_round = cstat->max_tx_udp_payload_size + << bbr->bw_probe_up_rounds; + + bbr->bw_probe_up_rounds = ngtcp2_min(bbr->bw_probe_up_rounds + 1, 30); + bbr->probe_up_cnt = ngtcp2_max(cstat->cwnd / growth_this_round, 1) * + cstat->max_tx_udp_payload_size; +} + +static void bbr_probe_inflight_hi_upward(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + uint64_t delta; + + if (!bbr->rst->is_cwnd_limited || cstat->cwnd < bbr->inflight_hi) { + return; + } + + bbr->bw_probe_up_acks += ack->bytes_delivered; + + if (bbr->bw_probe_up_acks >= bbr->probe_up_cnt) { + delta = bbr->bw_probe_up_acks / bbr->probe_up_cnt; + bbr->bw_probe_up_acks -= delta * bbr->probe_up_cnt; + bbr->inflight_hi += delta * cstat->max_tx_udp_payload_size; + } + + if (bbr->round_start) { + bbr_raise_inflight_hi_slope(bbr, cstat); + } +} + +static void bbr_adapt_upper_bounds(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + if (bbr->ack_phase == NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STARTING && + bbr->round_start) { + bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_FEEDBACK; + } + + if (bbr->ack_phase == NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING && + bbr->round_start) { + if (bbr_is_in_probe_bw_state(bbr) && !bbr->rst->rs.is_app_limited) { + bbr_advance_max_bw_filter(bbr); + } + } + + if (!bbr_check_inflight_too_high(bbr, cstat, ts)) { + /* bbr->bw_hi never be updated */ + if (bbr->inflight_hi == UINT64_MAX /* || bbr->bw_hi == UINT64_MAX */) { + return; + } + + if (bbr->rst->rs.tx_in_flight > bbr->inflight_hi) { + bbr->inflight_hi = bbr->rst->rs.tx_in_flight; + } + + if (cstat->delivery_rate_sec > bbr->bw_hi) { + bbr->bw_hi = cstat->delivery_rate_sec; + } + + if (bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_UP) { + bbr_probe_inflight_hi_upward(bbr, cstat, ack); + } + } +} + +static int bbr_check_time_to_probe_bw(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (bbr_has_elapsed_in_phase(bbr, bbr->bw_probe_wait, ts) || + bbr_is_reno_coexistence_probe_time(bbr, cstat)) { + bbr_start_probe_bw_refill(bbr); + + return 1; + } + + return 0; +} + +static void bbr_pick_probe_wait(ngtcp2_bbr2_cc *bbr) { + uint8_t rand; + + bbr->rand(&rand, 1, &bbr->rand_ctx); + + bbr->rounds_since_bw_probe = (uint64_t)(rand * 2 / 256); + + bbr->rand(&rand, 1, &bbr->rand_ctx); + + bbr->bw_probe_wait = 2 * NGTCP2_SECONDS + + (ngtcp2_tstamp)((double)rand / 255. * NGTCP2_SECONDS); +} + +static int bbr_is_reno_coexistence_probe_time(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t reno_rounds = + bbr_target_inflight(bbr, cstat) / cstat->max_tx_udp_payload_size; + + return bbr->rounds_since_bw_probe >= ngtcp2_min(reno_rounds, 63); +} + +static uint64_t bbr_target_inflight(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t bdp = bbr_inflight(bbr, cstat, bbr->bw, 1.0); + + return ngtcp2_min(bdp, cstat->cwnd); +} + +static int bbr_check_inflight_too_high(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (is_inflight_too_high(&bbr->rst->rs)) { + if (bbr->bw_probe_samples) { + bbr_handle_inflight_too_high(bbr, cstat, &bbr->rst->rs, ts); + } + + return 1; + } + + return 0; +} + +static int is_inflight_too_high(const ngtcp2_rs *rs) { + return rs->lost * NGTCP2_BBR_LOSS_THRESH_DENOM > + rs->tx_in_flight * NGTCP2_BBR_LOSS_THRESH_NUMER; +} + +static void bbr_handle_inflight_too_high(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_rs *rs, + ngtcp2_tstamp ts) { + bbr->bw_probe_samples = 0; + + if (!rs->is_app_limited) { + bbr->prior_inflight_hi = bbr->inflight_hi; + + bbr->inflight_hi = ngtcp2_max( + rs->tx_in_flight, bbr_target_inflight(bbr, cstat) * + NGTCP2_BBR_BETA_NUMER / NGTCP2_BBR_BETA_DENOM); + } + + if (bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_UP) { + bbr_start_probe_bw_down(bbr, ts); + } +} + +static void bbr_handle_lost_packet(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) { + ngtcp2_rs rs = {0}; + + if (!bbr->bw_probe_samples) { + return; + } + + rs.tx_in_flight = pkt->tx_in_flight; + /* bbr->rst->lost is not incremented for pkt yet */ + rs.lost = bbr->rst->lost + pkt->pktlen - pkt->lost; + rs.is_app_limited = pkt->is_app_limited; + + if (is_inflight_too_high(&rs)) { + rs.tx_in_flight = bbr_inflight_hi_from_lost_packet(bbr, &rs, pkt); + + bbr_handle_inflight_too_high(bbr, cstat, &rs, ts); + } +} + +static uint64_t bbr_inflight_hi_from_lost_packet(ngtcp2_bbr2_cc *bbr, + const ngtcp2_rs *rs, + const ngtcp2_cc_pkt *pkt) { + uint64_t inflight_prev, lost_prev, lost_prefix; + (void)bbr; + + assert(rs->tx_in_flight >= pkt->pktlen); + + inflight_prev = rs->tx_in_flight - pkt->pktlen; + + assert(rs->lost >= pkt->pktlen); + + lost_prev = rs->lost - pkt->pktlen; + + if (inflight_prev * NGTCP2_BBR_LOSS_THRESH_NUMER < + lost_prev * NGTCP2_BBR_LOSS_THRESH_DENOM) { + return inflight_prev; + } + + lost_prefix = (inflight_prev * NGTCP2_BBR_LOSS_THRESH_NUMER - + lost_prev * NGTCP2_BBR_LOSS_THRESH_DENOM) / + (NGTCP2_BBR_LOSS_THRESH_DENOM - NGTCP2_BBR_LOSS_THRESH_NUMER); + + return inflight_prev + lost_prefix; +} + +static void bbr_update_min_rtt(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts) { + int min_rtt_expired; + + bbr->probe_rtt_expired = + ts > bbr->probe_rtt_min_stamp + NGTCP2_BBR_PROBE_RTT_INTERVAL; + + if (ack->rtt != UINT64_MAX && + (ack->rtt < bbr->probe_rtt_min_delay || bbr->probe_rtt_expired)) { + bbr->probe_rtt_min_delay = ack->rtt; + bbr->probe_rtt_min_stamp = ts; + } + + min_rtt_expired = ts > bbr->min_rtt_stamp + NGTCP2_BBR_MIN_RTT_FILTERLEN; + + if (bbr->probe_rtt_min_delay < bbr->min_rtt || min_rtt_expired) { + bbr->min_rtt = bbr->probe_rtt_min_delay; + bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; + + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr2 update min_rtt=%" PRIu64, bbr->min_rtt); + } +} + +static void bbr_check_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (bbr->state != NGTCP2_BBR2_STATE_PROBE_RTT && bbr->probe_rtt_expired && + !bbr->idle_restart) { + bbr_enter_probe_rtt(bbr); + bbr_save_cwnd(bbr, cstat); + + bbr->probe_rtt_done_stamp = UINT64_MAX; + bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING; + + bbr_start_round(bbr); + } + + if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT) { + bbr_handle_probe_rtt(bbr, cstat, ts); + } + + if (bbr->rst->rs.delivered) { + bbr->idle_restart = 0; + } +} + +static void bbr_enter_probe_rtt(ngtcp2_bbr2_cc *bbr) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 enter ProbeRTT"); + + bbr->state = NGTCP2_BBR2_STATE_PROBE_RTT; + bbr->pacing_gain = 1; + bbr->cwnd_gain = NGTCP2_BBR_PROBE_RTT_CWND_GAIN; +} + +static void bbr_handle_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + bbr_mark_connection_app_limited(bbr, cstat); + + if (bbr->probe_rtt_done_stamp == UINT64_MAX && + cstat->bytes_in_flight <= bbr_probe_rtt_cwnd(bbr, cstat)) { + bbr->probe_rtt_done_stamp = ts + NGTCP2_BBR_PROBE_RTT_DURATION; + bbr->probe_rtt_round_done = 0; + + bbr_start_round(bbr); + + return; + } + + if (bbr->probe_rtt_done_stamp != UINT64_MAX) { + if (bbr->round_start) { + bbr->probe_rtt_round_done = 1; + } + + if (bbr->probe_rtt_round_done) { + bbr_check_probe_rtt_done(bbr, cstat, ts); + } + } +} + +static void bbr_check_probe_rtt_done(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (bbr->probe_rtt_done_stamp != UINT64_MAX && + ts > bbr->probe_rtt_done_stamp) { + bbr->probe_rtt_min_stamp = ts; + bbr_restore_cwnd(bbr, cstat); + bbr_exit_probe_rtt(bbr, ts); + } +} + +static void bbr_mark_connection_app_limited(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t app_limited = bbr->rst->delivered + cstat->bytes_in_flight; + + if (app_limited) { + bbr->rst->app_limited = app_limited; + } else { + bbr->rst->app_limited = cstat->max_tx_udp_payload_size; + } +} + +static void bbr_exit_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts) { + bbr_reset_lower_bounds(bbr); + + if (bbr->filled_pipe) { + bbr_start_probe_bw_down(bbr, ts); + bbr_start_probe_bw_cruise(bbr); + } else { + bbr_enter_startup(bbr); + } +} + +static void bbr_handle_restart_from_idle(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + if (cstat->bytes_in_flight == 0 && bbr->rst->app_limited) { + ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, + "bbr2 restart from idle"); + + bbr->idle_restart = 1; + bbr->extra_acked_interval_start = ts; + + if (bbr_is_in_probe_bw_state(bbr)) { + bbr_set_pacing_rate_with_gain(bbr, cstat, 1); + } else if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT) { + bbr_check_probe_rtt_done(bbr, cstat, ts); + } + } +} + +static uint64_t bbr_bdp_multiple(ngtcp2_bbr2_cc *bbr, uint64_t bw, + double gain) { + uint64_t bdp; + + if (bbr->min_rtt == UINT64_MAX) { + return bbr->initial_cwnd; + } + + bdp = bw * bbr->min_rtt / NGTCP2_SECONDS; + + return (uint64_t)(gain * (double)bdp); +} + +static uint64_t min_pipe_cwnd(size_t max_udp_payload_size) { + return max_udp_payload_size * 4; +} + +static uint64_t bbr_quantization_budget(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + uint64_t inflight) { + bbr_update_offload_budget(bbr, cstat); + + inflight = ngtcp2_max(inflight, bbr->offload_budget); + inflight = + ngtcp2_max(inflight, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); + + if (bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_UP) { + inflight += 2 * cstat->max_tx_udp_payload_size; + } + + return inflight; +} + +static uint64_t bbr_inflight(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + uint64_t bw, double gain) { + uint64_t inflight = bbr_bdp_multiple(bbr, bw, gain); + + return bbr_quantization_budget(bbr, cstat, inflight); +} + +static void bbr_update_max_inflight(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t inflight; + + /* Not documented */ + /* bbr_update_aggregation_budget(bbr); */ + + inflight = bbr_bdp_multiple(bbr, bbr->bw, bbr->cwnd_gain) + bbr->extra_acked; + bbr->max_inflight = bbr_quantization_budget(bbr, cstat, inflight); +} + +static void bbr_update_offload_budget(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + bbr->offload_budget = 3 * cstat->send_quantum; +} + +static void bbr_advance_max_bw_filter(ngtcp2_bbr2_cc *bbr) { + ++bbr->cycle_count; +} + +static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + if (ack->bytes_lost > 0) { + if (cstat->cwnd > ack->bytes_lost) { + cstat->cwnd -= ack->bytes_lost; + cstat->cwnd = ngtcp2_max(cstat->cwnd, 2 * cstat->max_tx_udp_payload_size); + } else { + cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; + } + } + + if (bbr->packet_conservation) { + cstat->cwnd = + ngtcp2_max(cstat->cwnd, cstat->bytes_in_flight + ack->bytes_delivered); + } +} + +static void bbr_save_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) { + if (!bbr->in_loss_recovery && bbr->state != NGTCP2_BBR2_STATE_PROBE_RTT) { + bbr->prior_cwnd = cstat->cwnd; + return; + } + + bbr->prior_cwnd = ngtcp2_max(bbr->prior_cwnd, cstat->cwnd); +} + +static void bbr_restore_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) { + cstat->cwnd = ngtcp2_max(cstat->cwnd, bbr->prior_cwnd); +} + +static uint64_t bbr_probe_rtt_cwnd(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t probe_rtt_cwnd = + bbr_bdp_multiple(bbr, bbr->bw, NGTCP2_BBR_PROBE_RTT_CWND_GAIN); + uint64_t mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); + + return ngtcp2_max(probe_rtt_cwnd, mpcwnd); +} + +static void bbr_bound_cwnd_for_probe_rtt(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t probe_rtt_cwnd; + + if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT) { + probe_rtt_cwnd = bbr_probe_rtt_cwnd(bbr, cstat); + + cstat->cwnd = ngtcp2_min(cstat->cwnd, probe_rtt_cwnd); + } +} + +static void bbr_set_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + uint64_t mpcwnd; + + bbr_update_max_inflight(bbr, cstat); + bbr_modulate_cwnd_for_recovery(bbr, cstat, ack); + + if (!bbr->packet_conservation) { + if (bbr->filled_pipe) { + cstat->cwnd += ack->bytes_delivered; + cstat->cwnd = ngtcp2_min(cstat->cwnd, bbr->max_inflight); + } else if (cstat->cwnd < bbr->max_inflight || + bbr->rst->delivered < bbr->initial_cwnd) { + cstat->cwnd += ack->bytes_delivered; + } + + mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); + cstat->cwnd = ngtcp2_max(cstat->cwnd, mpcwnd); + } + + bbr_bound_cwnd_for_probe_rtt(bbr, cstat); + bbr_bound_cwnd_for_model(bbr, cstat); +} + +static void bbr_bound_cwnd_for_model(ngtcp2_bbr2_cc *bbr, + ngtcp2_conn_stat *cstat) { + uint64_t cap = UINT64_MAX; + uint64_t mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size); + + if (bbr_is_in_probe_bw_state(bbr) && + bbr->state != NGTCP2_BBR2_STATE_PROBE_BW_CRUISE) { + cap = bbr->inflight_hi; + } else if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT || + bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_CRUISE) { + cap = bbr_inflight_with_headroom(bbr, cstat); + } + + cap = ngtcp2_min(cap, bbr->inflight_lo); + cap = ngtcp2_max(cap, mpcwnd); + + cstat->cwnd = ngtcp2_min(cstat->cwnd, cap); +} + +static void bbr_set_send_quantum(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) { + size_t send_quantum = + (size_t)(cstat->pacing_rate * (double)(bbr->min_rtt == UINT64_MAX + ? NGTCP2_MILLISECONDS + : bbr->min_rtt)); + (void)bbr; + + cstat->send_quantum = ngtcp2_min(send_quantum, 64 * 1024); + cstat->send_quantum = + ngtcp2_max(cstat->send_quantum, cstat->max_tx_udp_payload_size * 10); +} + +static int in_congestion_recovery(const ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_time) { + return cstat->congestion_recovery_start_ts != UINT64_MAX && + sent_time <= cstat->congestion_recovery_start_ts; +} + +static void bbr_handle_recovery(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack) { + if (bbr->in_loss_recovery) { + if (ack->pkt_delivered >= bbr->congestion_recovery_next_round_delivered) { + bbr->packet_conservation = 0; + } + + if (!in_congestion_recovery(cstat, ack->largest_acked_sent_ts)) { + bbr->in_loss_recovery = 0; + bbr->packet_conservation = 0; + bbr_restore_cwnd(bbr, cstat); + } + + return; + } + + if (bbr->congestion_recovery_start_ts != UINT64_MAX) { + bbr->in_loss_recovery = 1; + bbr_save_cwnd(bbr, cstat); + cstat->cwnd = + cstat->bytes_in_flight + + ngtcp2_max(ack->bytes_delivered, cstat->max_tx_udp_payload_size); + + cstat->congestion_recovery_start_ts = bbr->congestion_recovery_start_ts; + bbr->congestion_recovery_start_ts = UINT64_MAX; + bbr->packet_conservation = 1; + bbr->congestion_recovery_next_round_delivered = bbr->rst->delivered; + bbr->prior_inflight_lo = bbr->inflight_lo; + bbr->prior_bw_lo = bbr->bw_lo; + } +} + +static void bbr2_cc_init(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat, + ngtcp2_rst *rst, ngtcp2_tstamp initial_ts, + ngtcp2_rand rand, const ngtcp2_rand_ctx *rand_ctx, + ngtcp2_log *log) { + bbr->ccb.log = log; + bbr->rst = rst; + bbr->rand = rand; + bbr->rand_ctx = *rand_ctx; + bbr->initial_cwnd = cstat->cwnd; + + bbr_on_init(bbr, cstat, initial_ts); +} + +static void bbr2_cc_free(ngtcp2_bbr2_cc *bbr) { (void)bbr; } + +static void bbr2_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) { + (void)ccx; + (void)cstat; + (void)pkt; + (void)ts; +} + +static void bbr2_cc_on_pkt_lost(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb); + + bbr_update_on_loss(bbr, cstat, pkt, ts); +} + +static void bbr2_cc_congestion_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, ngtcp2_tstamp ts) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb); + + if (!bbr->filled_pipe || bbr->in_loss_recovery || + bbr->congestion_recovery_start_ts != UINT64_MAX || + in_congestion_recovery(cstat, sent_ts)) { + return; + } + + bbr->congestion_recovery_start_ts = ts; +} + +static void bbr2_cc_on_spurious_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb); + (void)ts; + + bbr->congestion_recovery_start_ts = UINT64_MAX; + cstat->congestion_recovery_start_ts = UINT64_MAX; + + if (bbr->in_loss_recovery) { + bbr->in_loss_recovery = 0; + bbr->packet_conservation = 0; + bbr_restore_cwnd(bbr, cstat); + bbr->full_bw_count = 0; + bbr->loss_in_round = 0; + bbr->inflight_lo = ngtcp2_max(bbr->inflight_lo, bbr->prior_inflight_lo); + bbr->inflight_hi = ngtcp2_max(bbr->inflight_hi, bbr->prior_inflight_hi); + bbr->bw_lo = ngtcp2_max(bbr->bw_lo, bbr->prior_bw_lo); + } +} + +static void bbr2_cc_on_persistent_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb); + (void)ts; + + cstat->congestion_recovery_start_ts = UINT64_MAX; + bbr->congestion_recovery_start_ts = UINT64_MAX; + bbr->in_loss_recovery = 0; + bbr->packet_conservation = 0; + + bbr_save_cwnd(bbr, cstat); + cstat->cwnd = cstat->bytes_in_flight + cstat->max_tx_udp_payload_size; + cstat->cwnd = + ngtcp2_max(cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size)); +} + +static void bbr2_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb); + + bbr_update_on_ack(bbr, cstat, ack, ts); +} + +static void bbr2_cc_on_pkt_sent(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb); + + bbr_on_transmit(bbr, cstat, pkt->sent_ts); +} + +static void bbr2_cc_new_rtt_sample(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + (void)ccx; + (void)cstat; + (void)ts; +} + +static void bbr2_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb); + + bbr_on_init(bbr, cstat, ts); +} + +static void bbr2_cc_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_cc_event_type event, ngtcp2_tstamp ts) { + (void)ccx; + (void)cstat; + (void)event; + (void)ts; +} + +int ngtcp2_cc_bbr2_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + ngtcp2_conn_stat *cstat, ngtcp2_rst *rst, + ngtcp2_tstamp initial_ts, ngtcp2_rand rand, + const ngtcp2_rand_ctx *rand_ctx, + const ngtcp2_mem *mem) { + ngtcp2_bbr2_cc *bbr; + + bbr = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_bbr2_cc)); + if (bbr == NULL) { + return NGTCP2_ERR_NOMEM; + } + + bbr2_cc_init(bbr, cstat, rst, initial_ts, rand, rand_ctx, log); + + cc->ccb = &bbr->ccb; + cc->on_pkt_acked = bbr2_cc_on_pkt_acked; + cc->on_pkt_lost = bbr2_cc_on_pkt_lost; + cc->congestion_event = bbr2_cc_congestion_event; + cc->on_spurious_congestion = bbr2_cc_on_spurious_congestion; + cc->on_persistent_congestion = bbr2_cc_on_persistent_congestion; + cc->on_ack_recv = bbr2_cc_on_ack_recv; + cc->on_pkt_sent = bbr2_cc_on_pkt_sent; + cc->new_rtt_sample = bbr2_cc_new_rtt_sample; + cc->reset = bbr2_cc_reset; + cc->event = bbr2_cc_event; + + return 0; +} + +void ngtcp2_cc_bbr2_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) { + ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(cc->ccb, ngtcp2_bbr2_cc, ccb); + + bbr2_cc_free(bbr); + ngtcp2_mem_free(mem, bbr); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h new file mode 100644 index 0000000..50dc05a --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h @@ -0,0 +1,149 @@ +/* + * ngtcp2 + * + * Copyright (c) 2021 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_BBR2_H +#define NGTCP2_BBR2_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_cc.h" +#include "ngtcp2_window_filter.h" + +typedef struct ngtcp2_rst ngtcp2_rst; + +typedef enum ngtcp2_bbr2_state { + NGTCP2_BBR2_STATE_STARTUP, + NGTCP2_BBR2_STATE_DRAIN, + NGTCP2_BBR2_STATE_PROBE_BW_DOWN, + NGTCP2_BBR2_STATE_PROBE_BW_CRUISE, + NGTCP2_BBR2_STATE_PROBE_BW_REFILL, + NGTCP2_BBR2_STATE_PROBE_BW_UP, + NGTCP2_BBR2_STATE_PROBE_RTT, +} ngtcp2_bbr2_state; + +typedef enum ngtcp2_bbr2_ack_phase { + NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STARTING, + NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING, + NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_FEEDBACK, + NGTCP2_BBR2_ACK_PHASE_ACKS_REFILLING, +} ngtcp2_bbr2_ack_phase; + +/* + * ngtcp2_bbr2_cc is BBR v2 congestion controller, described in + * https://datatracker.ietf.org/doc/html/draft-cardwell-iccrg-bbr-congestion-control-01 + */ +typedef struct ngtcp2_bbr2_cc { + ngtcp2_cc_base ccb; + + uint64_t initial_cwnd; + ngtcp2_rst *rst; + ngtcp2_rand rand; + ngtcp2_rand_ctx rand_ctx; + + /* max_bw_filter for tracking the maximum recent delivery rate + samples for estimating max_bw. */ + ngtcp2_window_filter max_bw_filter; + + ngtcp2_window_filter extra_acked_filter; + + ngtcp2_duration min_rtt; + ngtcp2_tstamp min_rtt_stamp; + ngtcp2_tstamp probe_rtt_done_stamp; + int probe_rtt_round_done; + uint64_t prior_cwnd; + int idle_restart; + ngtcp2_tstamp extra_acked_interval_start; + uint64_t extra_acked_delivered; + + /* Congestion signals */ + int loss_in_round; + uint64_t bw_latest; + uint64_t inflight_latest; + + /* Lower bounds */ + uint64_t bw_lo; + uint64_t inflight_lo; + + /* Round counting */ + uint64_t next_round_delivered; + int round_start; + uint64_t round_count; + + /* Full pipe */ + int filled_pipe; + uint64_t full_bw; + size_t full_bw_count; + + /* Pacing rate */ + double pacing_gain; + + ngtcp2_bbr2_state state; + double cwnd_gain; + + int loss_round_start; + uint64_t loss_round_delivered; + uint64_t rounds_since_bw_probe; + uint64_t max_bw; + uint64_t bw; + uint64_t cycle_count; + uint64_t extra_acked; + uint64_t bytes_lost_in_round; + size_t loss_events_in_round; + uint64_t offload_budget; + uint64_t probe_up_cnt; + ngtcp2_tstamp cycle_stamp; + ngtcp2_bbr2_ack_phase ack_phase; + ngtcp2_duration bw_probe_wait; + int bw_probe_samples; + size_t bw_probe_up_rounds; + uint64_t bw_probe_up_acks; + uint64_t inflight_hi; + uint64_t bw_hi; + int probe_rtt_expired; + ngtcp2_duration probe_rtt_min_delay; + ngtcp2_tstamp probe_rtt_min_stamp; + int in_loss_recovery; + int packet_conservation; + uint64_t max_inflight; + ngtcp2_tstamp congestion_recovery_start_ts; + uint64_t congestion_recovery_next_round_delivered; + + uint64_t prior_inflight_lo; + uint64_t prior_inflight_hi; + uint64_t prior_bw_lo; +} ngtcp2_bbr2_cc; + +int ngtcp2_cc_bbr2_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + ngtcp2_conn_stat *cstat, ngtcp2_rst *rst, + ngtcp2_tstamp initial_ts, ngtcp2_rand rand, + const ngtcp2_rand_ctx *rand_ctx, + const ngtcp2_mem *mem); + +void ngtcp2_cc_bbr2_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem); + +#endif /* NGTCP2_BBR2_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c new file mode 100644 index 0000000..75326d6 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c @@ -0,0 +1,56 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_buf.h" +#include "ngtcp2_mem.h" + +void ngtcp2_buf_init(ngtcp2_buf *buf, uint8_t *begin, size_t len) { + buf->begin = buf->pos = buf->last = begin; + buf->end = begin + len; +} + +void ngtcp2_buf_reset(ngtcp2_buf *buf) { buf->pos = buf->last = buf->begin; } + +size_t ngtcp2_buf_cap(const ngtcp2_buf *buf) { + return (size_t)(buf->end - buf->begin); +} + +int ngtcp2_buf_chain_new(ngtcp2_buf_chain **pbufchain, size_t len, + const ngtcp2_mem *mem) { + *pbufchain = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_buf_chain) + len); + if (*pbufchain == NULL) { + return NGTCP2_ERR_NOMEM; + } + + (*pbufchain)->next = NULL; + + ngtcp2_buf_init(&(*pbufchain)->buf, + (uint8_t *)(*pbufchain) + sizeof(ngtcp2_buf_chain), len); + + return 0; +} + +void ngtcp2_buf_chain_del(ngtcp2_buf_chain *bufchain, const ngtcp2_mem *mem) { + ngtcp2_mem_free(mem, bufchain); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h new file mode 100644 index 0000000..107d413 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h @@ -0,0 +1,108 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_BUF_H +#define NGTCP2_BUF_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +typedef struct ngtcp2_buf { + /* begin points to the beginning of the buffer. */ + uint8_t *begin; + /* end points to the one beyond of the last byte of the buffer */ + uint8_t *end; + /* pos pointers to the start of data. Typically, this points to the + point that next data should be read. Initially, it points to + |begin|. */ + uint8_t *pos; + /* last points to the one beyond of the last data of the buffer. + Typically, new data is written at this point. Initially, it + points to |begin|. */ + uint8_t *last; +} ngtcp2_buf; + +/* + * ngtcp2_buf_init initializes |buf| with the given buffer. + */ +void ngtcp2_buf_init(ngtcp2_buf *buf, uint8_t *begin, size_t len); + +/* + * ngtcp2_buf_reset resets pos and last fields to match begin field to + * make ngtcp2_buf_len(buf) return 0. + */ +void ngtcp2_buf_reset(ngtcp2_buf *buf); + +/* + * ngtcp2_buf_left returns the number of additional bytes which can be + * written to the underlying buffer. In other words, it returns + * buf->end - buf->last. + */ +#define ngtcp2_buf_left(BUF) (size_t)((BUF)->end - (BUF)->last) + +/* + * ngtcp2_buf_len returns the number of bytes left to read. In other + * words, it returns buf->last - buf->pos. + */ +#define ngtcp2_buf_len(BUF) (size_t)((BUF)->last - (BUF)->pos) + +/* + * ngtcp2_buf_cap returns the capacity of the buffer. In other words, + * it returns buf->end - buf->begin. + */ +size_t ngtcp2_buf_cap(const ngtcp2_buf *buf); + +/* + * ngtcp2_buf_chain is a linked list of ngtcp2_buf. + */ +typedef struct ngtcp2_buf_chain ngtcp2_buf_chain; + +struct ngtcp2_buf_chain { + ngtcp2_buf_chain *next; + ngtcp2_buf buf; +}; + +/* + * ngtcp2_buf_chain_new creates new ngtcp2_buf_chain and initializes + * the internal buffer with |len| bytes space. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_buf_chain_new(ngtcp2_buf_chain **pbufchain, size_t len, + const ngtcp2_mem *mem); + +/* + * ngtcp2_buf_chain_del deletes the resource allocated by |bufchain|. + * It also deletes the memory pointed by |bufchain|. + */ +void ngtcp2_buf_chain_del(ngtcp2_buf_chain *bufchain, const ngtcp2_mem *mem); + +#endif /* NGTCP2_BUF_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c new file mode 100644 index 0000000..0536639 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c @@ -0,0 +1,615 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_cc.h" + +#include <assert.h> + +#if defined(_MSC_VER) +# include <intrin.h> +#endif + +#include "ngtcp2_log.h" +#include "ngtcp2_macro.h" +#include "ngtcp2_mem.h" +#include "ngtcp2_rcvry.h" + +uint64_t ngtcp2_cc_compute_initcwnd(size_t max_udp_payload_size) { + uint64_t n = 2 * max_udp_payload_size; + n = ngtcp2_max(n, 14720); + return ngtcp2_min(10 * max_udp_payload_size, n); +} + +ngtcp2_cc_pkt *ngtcp2_cc_pkt_init(ngtcp2_cc_pkt *pkt, int64_t pkt_num, + size_t pktlen, ngtcp2_pktns_id pktns_id, + ngtcp2_tstamp sent_ts, uint64_t lost, + uint64_t tx_in_flight, int is_app_limited) { + pkt->pkt_num = pkt_num; + pkt->pktlen = pktlen; + pkt->pktns_id = pktns_id; + pkt->sent_ts = sent_ts; + pkt->lost = lost; + pkt->tx_in_flight = tx_in_flight; + pkt->is_app_limited = is_app_limited; + + return pkt; +} + +static void reno_cc_reset(ngtcp2_reno_cc *cc) { + cc->max_delivery_rate_sec = 0; + cc->target_cwnd = 0; + cc->pending_add = 0; +} + +void ngtcp2_reno_cc_init(ngtcp2_reno_cc *cc, ngtcp2_log *log) { + cc->ccb.log = log; + reno_cc_reset(cc); +} + +void ngtcp2_reno_cc_free(ngtcp2_reno_cc *cc) { (void)cc; } + +int ngtcp2_cc_reno_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + const ngtcp2_mem *mem) { + ngtcp2_reno_cc *reno_cc; + + reno_cc = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_reno_cc)); + if (reno_cc == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_reno_cc_init(reno_cc, log); + + cc->ccb = &reno_cc->ccb; + cc->on_pkt_acked = ngtcp2_cc_reno_cc_on_pkt_acked; + cc->congestion_event = ngtcp2_cc_reno_cc_congestion_event; + cc->on_persistent_congestion = ngtcp2_cc_reno_cc_on_persistent_congestion; + cc->on_ack_recv = ngtcp2_cc_reno_cc_on_ack_recv; + cc->reset = ngtcp2_cc_reno_cc_reset; + + return 0; +} + +void ngtcp2_cc_reno_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) { + ngtcp2_reno_cc *reno_cc = ngtcp2_struct_of(cc->ccb, ngtcp2_reno_cc, ccb); + + ngtcp2_reno_cc_free(reno_cc); + ngtcp2_mem_free(mem, reno_cc); +} + +static int in_congestion_recovery(const ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_time) { + return cstat->congestion_recovery_start_ts != UINT64_MAX && + sent_time <= cstat->congestion_recovery_start_ts; +} + +void ngtcp2_cc_reno_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, + ngtcp2_tstamp ts) { + ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb); + uint64_t m; + (void)ts; + + if (in_congestion_recovery(cstat, pkt->sent_ts)) { + return; + } + + if (cc->target_cwnd && cc->target_cwnd < cstat->cwnd) { + return; + } + + if (cstat->cwnd < cstat->ssthresh) { + cstat->cwnd += pkt->pktlen; + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 " acked, slow start cwnd=%" PRIu64, + pkt->pkt_num, cstat->cwnd); + return; + } + + m = cstat->max_tx_udp_payload_size * pkt->pktlen + cc->pending_add; + cc->pending_add = m % cstat->cwnd; + + cstat->cwnd += m / cstat->cwnd; +} + +void ngtcp2_cc_reno_cc_congestion_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, + ngtcp2_tstamp ts) { + ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb); + uint64_t min_cwnd; + + if (in_congestion_recovery(cstat, sent_ts)) { + return; + } + + cstat->congestion_recovery_start_ts = ts; + cstat->cwnd >>= NGTCP2_LOSS_REDUCTION_FACTOR_BITS; + min_cwnd = 2 * cstat->max_tx_udp_payload_size; + cstat->cwnd = ngtcp2_max(cstat->cwnd, min_cwnd); + cstat->ssthresh = cstat->cwnd; + + cc->pending_add = 0; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "reduce cwnd because of packet loss cwnd=%" PRIu64, + cstat->cwnd); +} + +void ngtcp2_cc_reno_cc_on_persistent_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + (void)ccx; + (void)ts; + + cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; + cstat->congestion_recovery_start_ts = UINT64_MAX; +} + +void ngtcp2_cc_reno_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) { + ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb); + uint64_t target_cwnd, initcwnd; + (void)ack; + (void)ts; + + /* TODO Use sliding window for min rtt measurement */ + /* TODO Use sliding window */ + cc->max_delivery_rate_sec = + ngtcp2_max(cc->max_delivery_rate_sec, cstat->delivery_rate_sec); + + if (cstat->min_rtt != UINT64_MAX && cc->max_delivery_rate_sec) { + target_cwnd = cc->max_delivery_rate_sec * cstat->min_rtt / NGTCP2_SECONDS; + initcwnd = ngtcp2_cc_compute_initcwnd(cstat->max_tx_udp_payload_size); + cc->target_cwnd = ngtcp2_max(initcwnd, target_cwnd) * 289 / 100; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "target_cwnd=%" PRIu64 " max_delivery_rate_sec=%" PRIu64 + " min_rtt=%" PRIu64, + cc->target_cwnd, cc->max_delivery_rate_sec, cstat->min_rtt); + } +} + +void ngtcp2_cc_reno_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb); + (void)cstat; + (void)ts; + + reno_cc_reset(cc); +} + +static void cubic_cc_reset(ngtcp2_cubic_cc *cc) { + cc->max_delivery_rate_sec = 0; + cc->target_cwnd = 0; + cc->w_last_max = 0; + cc->w_tcp = 0; + cc->origin_point = 0; + cc->epoch_start = UINT64_MAX; + cc->k = 0; + + cc->prior.cwnd = 0; + cc->prior.ssthresh = 0; + cc->prior.w_last_max = 0; + cc->prior.w_tcp = 0; + cc->prior.origin_point = 0; + cc->prior.epoch_start = UINT64_MAX; + cc->prior.k = 0; + + cc->rtt_sample_count = 0; + cc->current_round_min_rtt = UINT64_MAX; + cc->last_round_min_rtt = UINT64_MAX; + cc->window_end = -1; +} + +void ngtcp2_cubic_cc_init(ngtcp2_cubic_cc *cc, ngtcp2_log *log) { + cc->ccb.log = log; + cubic_cc_reset(cc); +} + +void ngtcp2_cubic_cc_free(ngtcp2_cubic_cc *cc) { (void)cc; } + +int ngtcp2_cc_cubic_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + const ngtcp2_mem *mem) { + ngtcp2_cubic_cc *cubic_cc; + + cubic_cc = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_cubic_cc)); + if (cubic_cc == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_cubic_cc_init(cubic_cc, log); + + cc->ccb = &cubic_cc->ccb; + cc->on_pkt_acked = ngtcp2_cc_cubic_cc_on_pkt_acked; + cc->congestion_event = ngtcp2_cc_cubic_cc_congestion_event; + cc->on_spurious_congestion = ngtcp2_cc_cubic_cc_on_spurious_congestion; + cc->on_persistent_congestion = ngtcp2_cc_cubic_cc_on_persistent_congestion; + cc->on_ack_recv = ngtcp2_cc_cubic_cc_on_ack_recv; + cc->on_pkt_sent = ngtcp2_cc_cubic_cc_on_pkt_sent; + cc->new_rtt_sample = ngtcp2_cc_cubic_cc_new_rtt_sample; + cc->reset = ngtcp2_cc_cubic_cc_reset; + cc->event = ngtcp2_cc_cubic_cc_event; + + return 0; +} + +void ngtcp2_cc_cubic_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) { + ngtcp2_cubic_cc *cubic_cc = ngtcp2_struct_of(cc->ccb, ngtcp2_cubic_cc, ccb); + + ngtcp2_cubic_cc_free(cubic_cc); + ngtcp2_mem_free(mem, cubic_cc); +} + +static uint64_t ngtcp2_cbrt(uint64_t n) { + int d; + uint64_t a; + + if (n == 0) { + return 0; + } + +#if defined(_MSC_VER) +# if defined(_M_X64) + d = (int)__lzcnt64(n); +# elif defined(_M_ARM64) + { + unsigned long index; + d = sizeof(uint64_t) * CHAR_BIT; + if (_BitScanReverse64(&index, n)) { + d = d - 1 - index; + } + } +# else + if ((n >> 32) != 0) { + d = __lzcnt((unsigned int)(n >> 32)); + } else { + d = 32 + __lzcnt((unsigned int)n); + } +# endif +#else + d = __builtin_clzll(n); +#endif + a = 1ULL << ((64 - d) / 3 + 1); + + for (; a * a * a > n;) { + a = (2 * a + n / a / a) / 3; + } + return a; +} + +/* HyStart++ constants */ +#define NGTCP2_HS_MIN_SSTHRESH 16 +#define NGTCP2_HS_N_RTT_SAMPLE 8 +#define NGTCP2_HS_MIN_ETA (4 * NGTCP2_MILLISECONDS) +#define NGTCP2_HS_MAX_ETA (16 * NGTCP2_MILLISECONDS) + +void ngtcp2_cc_cubic_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, + ngtcp2_tstamp ts) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + ngtcp2_duration t, eta; + uint64_t target, cwnd_thres; + uint64_t tx, kx, time_delta, delta; + uint64_t add, tcp_add; + uint64_t m; + + if (pkt->pktns_id == NGTCP2_PKTNS_ID_APPLICATION && cc->window_end != -1 && + cc->window_end <= pkt->pkt_num) { + cc->window_end = -1; + } + + if (in_congestion_recovery(cstat, pkt->sent_ts)) { + return; + } + + if (cc->target_cwnd && cc->target_cwnd < cstat->cwnd) { + return; + } + + if (cstat->cwnd < cstat->ssthresh) { + /* slow-start */ + cstat->cwnd += pkt->pktlen; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 " acked, slow start cwnd=%" PRIu64, + pkt->pkt_num, cstat->cwnd); + + if (cc->last_round_min_rtt != UINT64_MAX && + cc->current_round_min_rtt != UINT64_MAX && + cstat->cwnd >= + NGTCP2_HS_MIN_SSTHRESH * cstat->max_tx_udp_payload_size && + cc->rtt_sample_count >= NGTCP2_HS_N_RTT_SAMPLE) { + eta = cc->last_round_min_rtt / 8; + + if (eta < NGTCP2_HS_MIN_ETA) { + eta = NGTCP2_HS_MIN_ETA; + } else if (eta > NGTCP2_HS_MAX_ETA) { + eta = NGTCP2_HS_MAX_ETA; + } + + if (cc->current_round_min_rtt >= cc->last_round_min_rtt + eta) { + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "HyStart++ exit slow start"); + + cc->w_last_max = cstat->cwnd; + cstat->ssthresh = cstat->cwnd; + } + } + + return; + } + + /* congestion avoidance */ + + if (cc->epoch_start == UINT64_MAX) { + cc->epoch_start = ts; + if (cstat->cwnd < cc->w_last_max) { + cc->k = ngtcp2_cbrt((cc->w_last_max - cstat->cwnd) * 10 / 4 / + cstat->max_tx_udp_payload_size); + cc->origin_point = cc->w_last_max; + } else { + cc->k = 0; + cc->origin_point = cstat->cwnd; + } + + cc->w_tcp = cstat->cwnd; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "cubic-ca epoch_start=%" PRIu64 " k=%" PRIu64 + " origin_point=%" PRIu64, + cc->epoch_start, cc->k, cc->origin_point); + + cc->pending_add = 0; + cc->pending_w_add = 0; + } + + t = ts - cc->epoch_start; + + tx = (t << 10) / NGTCP2_SECONDS; + kx = (cc->k << 10); + + if (tx > kx) { + time_delta = tx - kx; + } else { + time_delta = kx - tx; + } + + delta = cstat->max_tx_udp_payload_size * + ((((time_delta * time_delta) >> 10) * time_delta) >> 10) * 4 / 10; + delta >>= 10; + + if (tx > kx) { + target = cc->origin_point + delta; + } else { + target = cc->origin_point - delta; + } + + cwnd_thres = + (target * (((t + cstat->smoothed_rtt) << 10) / NGTCP2_SECONDS)) >> 10; + if (cwnd_thres < cstat->cwnd) { + target = cstat->cwnd; + } else if (2 * cwnd_thres > 3 * cstat->cwnd) { + target = cstat->cwnd * 3 / 2; + } else { + target = cwnd_thres; + } + + if (target > cstat->cwnd) { + m = cc->pending_add + + cstat->max_tx_udp_payload_size * (target - cstat->cwnd); + add = m / cstat->cwnd; + cc->pending_add = m % cstat->cwnd; + } else { + m = cc->pending_add + cstat->max_tx_udp_payload_size; + add = m / (100 * cstat->cwnd); + cc->pending_add = m % (100 * cstat->cwnd); + } + + m = cc->pending_w_add + cstat->max_tx_udp_payload_size * pkt->pktlen; + + cc->w_tcp += m / cstat->cwnd; + cc->pending_w_add = m % cstat->cwnd; + + if (cc->w_tcp > cstat->cwnd) { + tcp_add = cstat->max_tx_udp_payload_size * (cc->w_tcp - cstat->cwnd) / + cstat->cwnd; + if (tcp_add > add) { + add = tcp_add; + } + } + + cstat->cwnd += add; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 " acked, cubic-ca cwnd=%" PRIu64 " t=%" PRIu64 + " k=%" PRIi64 " time_delta=%" PRIu64 " delta=%" PRIu64 + " target=%" PRIu64 " w_tcp=%" PRIu64, + pkt->pkt_num, cstat->cwnd, t, cc->k, time_delta >> 4, delta, + target, cc->w_tcp); +} + +void ngtcp2_cc_cubic_cc_congestion_event(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, + ngtcp2_tstamp ts) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + uint64_t min_cwnd; + + if (in_congestion_recovery(cstat, sent_ts)) { + return; + } + + if (cc->prior.cwnd < cstat->cwnd) { + cc->prior.cwnd = cstat->cwnd; + cc->prior.ssthresh = cstat->ssthresh; + cc->prior.w_last_max = cc->w_last_max; + cc->prior.w_tcp = cc->w_tcp; + cc->prior.origin_point = cc->origin_point; + cc->prior.epoch_start = cc->epoch_start; + cc->prior.k = cc->k; + } + + cstat->congestion_recovery_start_ts = ts; + + cc->epoch_start = UINT64_MAX; + if (cstat->cwnd < cc->w_last_max) { + cc->w_last_max = cstat->cwnd * 17 / 10 / 2; + } else { + cc->w_last_max = cstat->cwnd; + } + + min_cwnd = 2 * cstat->max_tx_udp_payload_size; + cstat->ssthresh = cstat->cwnd * 7 / 10; + cstat->ssthresh = ngtcp2_max(cstat->ssthresh, min_cwnd); + cstat->cwnd = cstat->ssthresh; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "reduce cwnd because of packet loss cwnd=%" PRIu64, + cstat->cwnd); +} + +void ngtcp2_cc_cubic_cc_on_spurious_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + (void)ts; + + if (cstat->cwnd >= cc->prior.cwnd) { + return; + } + + cstat->congestion_recovery_start_ts = UINT64_MAX; + + cstat->cwnd = cc->prior.cwnd; + cstat->ssthresh = cc->prior.ssthresh; + cc->w_last_max = cc->prior.w_last_max; + cc->w_tcp = cc->prior.w_tcp; + cc->origin_point = cc->prior.origin_point; + cc->epoch_start = cc->prior.epoch_start; + cc->k = cc->prior.k; + + cc->prior.cwnd = 0; + cc->prior.ssthresh = 0; + cc->prior.w_last_max = 0; + cc->prior.w_tcp = 0; + cc->prior.origin_point = 0; + cc->prior.epoch_start = UINT64_MAX; + cc->prior.k = 0; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "spurious congestion is detected and congestion state is " + "restored cwnd=%" PRIu64, + cstat->cwnd); +} + +void ngtcp2_cc_cubic_cc_on_persistent_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + (void)ccx; + (void)ts; + + cstat->cwnd = 2 * cstat->max_tx_udp_payload_size; + cstat->congestion_recovery_start_ts = UINT64_MAX; +} + +void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + uint64_t target_cwnd, initcwnd; + (void)ack; + (void)ts; + + /* TODO Use sliding window for min rtt measurement */ + /* TODO Use sliding window */ + cc->max_delivery_rate_sec = + ngtcp2_max(cc->max_delivery_rate_sec, cstat->delivery_rate_sec); + + if (cstat->min_rtt != UINT64_MAX && cc->max_delivery_rate_sec) { + target_cwnd = cc->max_delivery_rate_sec * cstat->min_rtt / NGTCP2_SECONDS; + initcwnd = ngtcp2_cc_compute_initcwnd(cstat->max_tx_udp_payload_size); + cc->target_cwnd = ngtcp2_max(initcwnd, target_cwnd) * 289 / 100; + + ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, + "target_cwnd=%" PRIu64 " max_delivery_rate_sec=%" PRIu64 + " min_rtt=%" PRIu64, + cc->target_cwnd, cc->max_delivery_rate_sec, cstat->min_rtt); + } +} + +void ngtcp2_cc_cubic_cc_on_pkt_sent(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + (void)cstat; + + if (pkt->pktns_id != NGTCP2_PKTNS_ID_APPLICATION || cc->window_end != -1) { + return; + } + + cc->window_end = pkt->pkt_num; + cc->last_round_min_rtt = cc->current_round_min_rtt; + cc->current_round_min_rtt = UINT64_MAX; + cc->rtt_sample_count = 0; +} + +void ngtcp2_cc_cubic_cc_new_rtt_sample(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + (void)ts; + + if (cc->window_end == -1) { + return; + } + + cc->current_round_min_rtt = + ngtcp2_min(cc->current_round_min_rtt, cstat->latest_rtt); + ++cc->rtt_sample_count; +} + +void ngtcp2_cc_cubic_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + (void)cstat; + (void)ts; + + cubic_cc_reset(cc); +} + +void ngtcp2_cc_cubic_cc_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat, + ngtcp2_cc_event_type event, ngtcp2_tstamp ts) { + ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb); + ngtcp2_tstamp last_ts; + + if (event != NGTCP2_CC_EVENT_TYPE_TX_START || cc->epoch_start == UINT64_MAX) { + return; + } + + last_ts = cstat->last_tx_pkt_ts[NGTCP2_PKTNS_ID_APPLICATION]; + if (last_ts == UINT64_MAX || last_ts <= cc->epoch_start) { + return; + } + + assert(ts >= last_ts); + + cc->epoch_start += ts - last_ts; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h new file mode 100644 index 0000000..6d9e0c2 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h @@ -0,0 +1,421 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_CC_H +#define NGTCP2_CC_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#define NGTCP2_LOSS_REDUCTION_FACTOR_BITS 1 +#define NGTCP2_PERSISTENT_CONGESTION_THRESHOLD 3 + +typedef struct ngtcp2_log ngtcp2_log; + +/** + * @struct + * + * :type:`ngtcp2_cc_base` is the base structure of custom congestion + * control algorithm. It must be the first field of custom congestion + * controller. + */ +typedef struct ngtcp2_cc_base { + /** + * :member:`log` is ngtcp2 library internal logger. + */ + ngtcp2_log *log; +} ngtcp2_cc_base; + +/** + * @struct + * + * :type:`ngtcp2_cc_pkt` is a convenient structure to include + * acked/lost/sent packet. + */ +typedef struct ngtcp2_cc_pkt { + /** + * :member:`pkt_num` is the packet number + */ + int64_t pkt_num; + /** + * :member:`pktlen` is the length of packet. + */ + size_t pktlen; + /** + * :member:`pktns_id` is the ID of packet number space which this + * packet belongs to. + */ + ngtcp2_pktns_id pktns_id; + /** + * :member:`sent_ts` is the timestamp when packet is sent. + */ + ngtcp2_tstamp sent_ts; + /** + * :member:`lost` is the number of bytes lost when this packet was + * sent. + */ + uint64_t lost; + /** + * :member:`tx_in_flight` is the bytes in flight when this packet + * was sent. + */ + uint64_t tx_in_flight; + /** + * :member:`is_app_limited` is nonzero if the connection is + * app-limited when this packet was sent. + */ + int is_app_limited; +} ngtcp2_cc_pkt; + +/** + * @struct + * + * :type:`ngtcp2_cc_ack` is a convenient structure which stores + * acknowledged and lost bytes. + */ +typedef struct ngtcp2_cc_ack { + /** + * :member:`prior_bytes_in_flight` is the in-flight bytes before + * processing this ACK. + */ + uint64_t prior_bytes_in_flight; + /** + * :member:`bytes_delivered` is the number of bytes acknowledged. + */ + uint64_t bytes_delivered; + /** + * :member:`bytes_lost` is the number of bytes declared lost. + */ + uint64_t bytes_lost; + /** + * :member:`pkt_delivered` is the cumulative acknowledged bytes when + * the last packet acknowledged by this ACK was sent. + */ + uint64_t pkt_delivered; + /** + * :member:`largest_acked_sent_ts` is the time when the largest + * acknowledged packet was sent. + */ + ngtcp2_tstamp largest_acked_sent_ts; + /** + * :member:`rtt` is the RTT sample. It is UINT64_MAX if no RTT + * sample is available. + */ + ngtcp2_duration rtt; +} ngtcp2_cc_ack; + +typedef struct ngtcp2_cc ngtcp2_cc; + +/** + * @functypedef + * + * :type:`ngtcp2_cc_on_pkt_acked` is a callback function which is + * called with an acknowledged packet. + */ +typedef void (*ngtcp2_cc_on_pkt_acked)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, + ngtcp2_tstamp ts); + +/** + * @functypedef + * + * :type:`ngtcp2_cc_on_pkt_lost` is a callback function which is + * called with a lost packet. + */ +typedef void (*ngtcp2_cc_on_pkt_lost)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, + ngtcp2_tstamp ts); +/** + * @functypedef + * + * :type:`ngtcp2_cc_congestion_event` is a callback function which is + * called when congestion event happens (e.g., when packet is lost). + */ +typedef void (*ngtcp2_cc_congestion_event)(ngtcp2_cc *cc, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, + ngtcp2_tstamp ts); + +/** + * @functypedef + * + * :type:`ngtcp2_cc_on_spurious_congestion` is a callback function + * which is called when a spurious congestion is detected. + */ +typedef void (*ngtcp2_cc_on_spurious_congestion)(ngtcp2_cc *cc, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +/** + * @functypedef + * + * :type:`ngtcp2_cc_on_persistent_congestion` is a callback function + * which is called when persistent congestion is established. + */ +typedef void (*ngtcp2_cc_on_persistent_congestion)(ngtcp2_cc *cc, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +/** + * @functypedef + * + * :type:`ngtcp2_cc_on_ack_recv` is a callback function which is + * called when an acknowledgement is received. + */ +typedef void (*ngtcp2_cc_on_ack_recv)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, + ngtcp2_tstamp ts); + +/** + * @functypedef + * + * :type:`ngtcp2_cc_on_pkt_sent` is a callback function which is + * called when an ack-eliciting packet is sent. + */ +typedef void (*ngtcp2_cc_on_pkt_sent)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt); + +/** + * @functypedef + * + * :type:`ngtcp2_cc_new_rtt_sample` is a callback function which is + * called when new RTT sample is obtained. + */ +typedef void (*ngtcp2_cc_new_rtt_sample)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +/** + * @functypedef + * + * :type:`ngtcp2_cc_reset` is a callback function which is called when + * congestion state must be reset. + */ +typedef void (*ngtcp2_cc_reset)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +/** + * @enum + * + * :type:`ngtcp2_cc_event_type` defines congestion control events. + */ +typedef enum ngtcp2_cc_event_type { + /** + * :enum:`NGTCP2_CC_EVENT_TX_START` occurs when ack-eliciting packet + * is sent and no other ack-eliciting packet is present. + */ + NGTCP2_CC_EVENT_TYPE_TX_START +} ngtcp2_cc_event_type; + +/** + * @functypedef + * + * :type:`ngtcp2_cc_event` is a callback function which is called when + * a specific event happens. + */ +typedef void (*ngtcp2_cc_event)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_cc_event_type event, ngtcp2_tstamp ts); + +/** + * @struct + * + * :type:`ngtcp2_cc` is congestion control algorithm interface to + * allow custom implementation. + */ +typedef struct ngtcp2_cc { + /** + * :member:`ccb` is a pointer to :type:`ngtcp2_cc_base` which + * usually contains a state. + */ + ngtcp2_cc_base *ccb; + /** + * :member:`on_pkt_acked` is a callback function which is called + * when a packet is acknowledged. + */ + ngtcp2_cc_on_pkt_acked on_pkt_acked; + /** + * :member:`on_pkt_lost` is a callback function which is called when + * a packet is lost. + */ + ngtcp2_cc_on_pkt_lost on_pkt_lost; + /** + * :member:`congestion_event` is a callback function which is called + * when congestion event happens (.e.g, packet is lost). + */ + ngtcp2_cc_congestion_event congestion_event; + /** + * :member:`on_spurious_congestion` is a callback function which is + * called when a spurious congestion is detected. + */ + ngtcp2_cc_on_spurious_congestion on_spurious_congestion; + /** + * :member:`on_persistent_congestion` is a callback function which + * is called when persistent congestion is established. + */ + ngtcp2_cc_on_persistent_congestion on_persistent_congestion; + /** + * :member:`on_ack_recv` is a callback function which is called when + * an acknowledgement is received. + */ + ngtcp2_cc_on_ack_recv on_ack_recv; + /** + * :member:`on_pkt_sent` is a callback function which is called when + * ack-eliciting packet is sent. + */ + ngtcp2_cc_on_pkt_sent on_pkt_sent; + /** + * :member:`new_rtt_sample` is a callback function which is called + * when new RTT sample is obtained. + */ + ngtcp2_cc_new_rtt_sample new_rtt_sample; + /** + * :member:`reset` is a callback function which is called when + * congestion control state must be reset. + */ + ngtcp2_cc_reset reset; + /** + * :member:`event` is a callback function which is called when a + * specific event happens. + */ + ngtcp2_cc_event event; +} ngtcp2_cc; + +/* + * ngtcp2_cc_compute_initcwnd computes initial cwnd. + */ +uint64_t ngtcp2_cc_compute_initcwnd(size_t max_packet_size); + +ngtcp2_cc_pkt *ngtcp2_cc_pkt_init(ngtcp2_cc_pkt *pkt, int64_t pkt_num, + size_t pktlen, ngtcp2_pktns_id pktns_id, + ngtcp2_tstamp sent_ts, uint64_t lost, + uint64_t tx_in_flight, int is_app_limited); + +/* ngtcp2_reno_cc is the RENO congestion controller. */ +typedef struct ngtcp2_reno_cc { + ngtcp2_cc_base ccb; + uint64_t max_delivery_rate_sec; + uint64_t target_cwnd; + uint64_t pending_add; +} ngtcp2_reno_cc; + +int ngtcp2_cc_reno_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + const ngtcp2_mem *mem); + +void ngtcp2_cc_reno_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem); + +void ngtcp2_reno_cc_init(ngtcp2_reno_cc *cc, ngtcp2_log *log); + +void ngtcp2_reno_cc_free(ngtcp2_reno_cc *cc); + +void ngtcp2_cc_reno_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts); + +void ngtcp2_cc_reno_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, + ngtcp2_tstamp ts); + +void ngtcp2_cc_reno_cc_on_persistent_congestion(ngtcp2_cc *cc, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_reno_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); + +void ngtcp2_cc_reno_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +/* ngtcp2_cubic_cc is CUBIC congestion controller. */ +typedef struct ngtcp2_cubic_cc { + ngtcp2_cc_base ccb; + uint64_t max_delivery_rate_sec; + uint64_t target_cwnd; + uint64_t w_last_max; + uint64_t w_tcp; + uint64_t origin_point; + ngtcp2_tstamp epoch_start; + uint64_t k; + /* prior stores the congestion state when a congestion event occurs + in order to restore the state when it turns out that the event is + spurious. */ + struct { + uint64_t cwnd; + uint64_t ssthresh; + uint64_t w_last_max; + uint64_t w_tcp; + uint64_t origin_point; + ngtcp2_tstamp epoch_start; + uint64_t k; + } prior; + /* HyStart++ variables */ + size_t rtt_sample_count; + uint64_t current_round_min_rtt; + uint64_t last_round_min_rtt; + int64_t window_end; + uint64_t pending_add; + uint64_t pending_w_add; +} ngtcp2_cubic_cc; + +int ngtcp2_cc_cubic_cc_init(ngtcp2_cc *cc, ngtcp2_log *log, + const ngtcp2_mem *mem); + +void ngtcp2_cc_cubic_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem); + +void ngtcp2_cubic_cc_init(ngtcp2_cubic_cc *cc, ngtcp2_log *log); + +void ngtcp2_cubic_cc_free(ngtcp2_cubic_cc *cc); + +void ngtcp2_cc_cubic_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt, + ngtcp2_tstamp ts); + +void ngtcp2_cc_cubic_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp sent_ts, + ngtcp2_tstamp ts); + +void ngtcp2_cc_cubic_cc_on_spurious_congestion(ngtcp2_cc *ccx, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_cubic_cc_on_persistent_congestion(ngtcp2_cc *cc, + ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts); + +void ngtcp2_cc_cubic_cc_on_pkt_sent(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_cc_pkt *pkt); + +void ngtcp2_cc_cubic_cc_new_rtt_sample(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_cubic_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +void ngtcp2_cc_cubic_cc_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + ngtcp2_cc_event_type event, ngtcp2_tstamp ts); + +#endif /* NGTCP2_CC_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c new file mode 100644 index 0000000..f3b92b5 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c @@ -0,0 +1,147 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_cid.h" + +#include <assert.h> +#include <string.h> + +#include "ngtcp2_path.h" +#include "ngtcp2_str.h" + +void ngtcp2_cid_zero(ngtcp2_cid *cid) { memset(cid, 0, sizeof(*cid)); } + +void ngtcp2_cid_init(ngtcp2_cid *cid, const uint8_t *data, size_t datalen) { + assert(datalen <= NGTCP2_MAX_CIDLEN); + + cid->datalen = datalen; + if (datalen) { + ngtcp2_cpymem(cid->data, data, datalen); + } +} + +int ngtcp2_cid_eq(const ngtcp2_cid *cid, const ngtcp2_cid *other) { + return cid->datalen == other->datalen && + 0 == memcmp(cid->data, other->data, cid->datalen); +} + +int ngtcp2_cid_less(const ngtcp2_cid *lhs, const ngtcp2_cid *rhs) { + int s = lhs->datalen < rhs->datalen; + size_t n = s ? lhs->datalen : rhs->datalen; + int c = memcmp(lhs->data, rhs->data, n); + + return c < 0 || (c == 0 && s); +} + +int ngtcp2_cid_empty(const ngtcp2_cid *cid) { return cid->datalen == 0; } + +void ngtcp2_scid_init(ngtcp2_scid *scid, uint64_t seq, const ngtcp2_cid *cid) { + scid->pe.index = NGTCP2_PQ_BAD_INDEX; + scid->seq = seq; + scid->cid = *cid; + scid->retired_ts = UINT64_MAX; + scid->flags = NGTCP2_SCID_FLAG_NONE; +} + +void ngtcp2_scid_copy(ngtcp2_scid *dest, const ngtcp2_scid *src) { + ngtcp2_scid_init(dest, src->seq, &src->cid); + dest->retired_ts = src->retired_ts; + dest->flags = src->flags; +} + +void ngtcp2_dcid_init(ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid, + const uint8_t *token) { + dcid->seq = seq; + dcid->cid = *cid; + if (token) { + memcpy(dcid->token, token, NGTCP2_STATELESS_RESET_TOKENLEN); + dcid->flags = NGTCP2_DCID_FLAG_TOKEN_PRESENT; + } else { + dcid->flags = NGTCP2_DCID_FLAG_NONE; + } + ngtcp2_path_storage_zero(&dcid->ps); + dcid->retired_ts = UINT64_MAX; + dcid->bound_ts = UINT64_MAX; + dcid->bytes_sent = 0; + dcid->bytes_recv = 0; + dcid->max_udp_payload_size = NGTCP2_MAX_UDP_PAYLOAD_SIZE; +} + +void ngtcp2_dcid_set_token(ngtcp2_dcid *dcid, const uint8_t *token) { + assert(token); + + dcid->flags |= NGTCP2_DCID_FLAG_TOKEN_PRESENT; + memcpy(dcid->token, token, NGTCP2_STATELESS_RESET_TOKENLEN); +} + +void ngtcp2_dcid_set_path(ngtcp2_dcid *dcid, const ngtcp2_path *path) { + ngtcp2_path_copy(&dcid->ps.path, path); +} + +void ngtcp2_dcid_copy(ngtcp2_dcid *dest, const ngtcp2_dcid *src) { + ngtcp2_dcid_init(dest, src->seq, &src->cid, + (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) ? src->token + : NULL); + ngtcp2_path_copy(&dest->ps.path, &src->ps.path); + dest->retired_ts = src->retired_ts; + dest->bound_ts = src->bound_ts; + dest->flags = src->flags; + dest->bytes_sent = src->bytes_sent; + dest->bytes_recv = src->bytes_recv; + dest->max_udp_payload_size = src->max_udp_payload_size; +} + +void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src) { + dest->seq = src->seq; + dest->cid = src->cid; + if (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) { + dest->flags |= NGTCP2_DCID_FLAG_TOKEN_PRESENT; + memcpy(dest->token, src->token, NGTCP2_STATELESS_RESET_TOKENLEN); + } else if (dest->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) { + dest->flags &= (uint8_t)~NGTCP2_DCID_FLAG_TOKEN_PRESENT; + } +} + +int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq, + const ngtcp2_cid *cid, const uint8_t *token) { + if (dcid->seq == seq) { + return ngtcp2_cid_eq(&dcid->cid, cid) && + (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) && + memcmp(dcid->token, token, + NGTCP2_STATELESS_RESET_TOKENLEN) == 0 + ? 0 + : NGTCP2_ERR_PROTO; + } + + return !ngtcp2_cid_eq(&dcid->cid, cid) ? 0 : NGTCP2_ERR_PROTO; +} + +int ngtcp2_dcid_verify_stateless_reset_token(const ngtcp2_dcid *dcid, + const uint8_t *token) { + return (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) && + ngtcp2_cmemeq(dcid->token, token, + NGTCP2_STATELESS_RESET_TOKENLEN) + ? 0 + : NGTCP2_ERR_INVALID_ARGUMENT; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h new file mode 100644 index 0000000..0b37441 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h @@ -0,0 +1,175 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_CID_H +#define NGTCP2_CID_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_pq.h" +#include "ngtcp2_path.h" + +/* NGTCP2_SCID_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_SCID_FLAG_NONE 0x00u +/* NGTCP2_SCID_FLAG_USED indicates that a local endpoint observed that + a remote endpoint uses a particular Connection ID. */ +#define NGTCP2_SCID_FLAG_USED 0x01u +/* NGTCP2_SCID_FLAG_RETIRED indicates that a particular Connection ID + is retired. */ +#define NGTCP2_SCID_FLAG_RETIRED 0x02u + +typedef struct ngtcp2_scid { + ngtcp2_pq_entry pe; + /* seq is the sequence number associated to the CID. */ + uint64_t seq; + /* cid is a connection ID */ + ngtcp2_cid cid; + /* retired_ts is the timestamp when peer tells that this CID is + retired. */ + ngtcp2_tstamp retired_ts; + /* flags is the bitwise OR of zero or more of NGTCP2_SCID_FLAG_*. */ + uint8_t flags; +} ngtcp2_scid; + +/* NGTCP2_DCID_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_DCID_FLAG_NONE 0x00u +/* NGTCP2_DCID_FLAG_PATH_VALIDATED indicates that an associated path + has been validated. */ +#define NGTCP2_DCID_FLAG_PATH_VALIDATED 0x01u +/* NGTCP2_DCID_FLAG_TOKEN_PRESENT indicates that a stateless reset + token is set in token field. */ +#define NGTCP2_DCID_FLAG_TOKEN_PRESENT 0x02u + +typedef struct ngtcp2_dcid { + /* seq is the sequence number associated to the CID. */ + uint64_t seq; + /* cid is a connection ID */ + ngtcp2_cid cid; + /* path is a path which cid is bound to. The addresses are zero + length if cid has not been bound to a particular path yet. */ + ngtcp2_path_storage ps; + /* retired_ts is the timestamp when peer tells that this CID is + retired. */ + ngtcp2_tstamp retired_ts; + /* bound_ts is the timestamp when this connection ID is bound to a + particular path. It is only assigned when a connection ID is + used just for sending PATH_RESPONSE and is not zero-length. */ + ngtcp2_tstamp bound_ts; + /* bytes_sent is the number of bytes sent to an associated path. */ + uint64_t bytes_sent; + /* bytes_recv is the number of bytes received from an associated + path. */ + uint64_t bytes_recv; + /* max_udp_payload_size is the maximum size of UDP payload that is + allowed to send to this path. */ + size_t max_udp_payload_size; + /* flags is bitwise OR of zero or more of NGTCP2_DCID_FLAG_*. */ + uint8_t flags; + /* token is a stateless reset token associated to this CID. + Actually, the stateless reset token is tied to the connection, + not to the particular connection ID. */ + uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN]; +} ngtcp2_dcid; + +/* ngtcp2_cid_zero makes |cid| zero-length. */ +void ngtcp2_cid_zero(ngtcp2_cid *cid); + +/* + * ngtcp2_cid_less returns nonzero if |lhs| is lexicographical smaller + * than |rhs|. + */ +int ngtcp2_cid_less(const ngtcp2_cid *lhs, const ngtcp2_cid *rhs); + +/* + * ngtcp2_cid_empty returns nonzero if |cid| includes empty connection + * ID. + */ +int ngtcp2_cid_empty(const ngtcp2_cid *cid); + +/* + * ngtcp2_scid_init initializes |scid| with the given parameters. + */ +void ngtcp2_scid_init(ngtcp2_scid *scid, uint64_t seq, const ngtcp2_cid *cid); + +/* + * ngtcp2_scid_copy copies |src| into |dest|. + */ +void ngtcp2_scid_copy(ngtcp2_scid *dest, const ngtcp2_scid *src); + +/* + * ngtcp2_dcid_init initializes |dcid| with the given parameters. If + * |token| is NULL, the function fills dcid->token it with 0. |token| + * must be NGTCP2_STATELESS_RESET_TOKENLEN bytes long. + */ +void ngtcp2_dcid_init(ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid, + const uint8_t *token); + +/* + * ngtcp2_dcid_set_token sets |token| to |dcid|. |token| must not be + * NULL and must be NGTCP2_STATELESS_RESET_TOKENLEN bytes long. + */ +void ngtcp2_dcid_set_token(ngtcp2_dcid *dcid, const uint8_t *token); + +/* + * ngtcp2_dcid_set_path sets |path| to |dcid|. It sets + * max_udp_payload_size to the minimum UDP payload size supported + * by the IP protocol version. + */ +void ngtcp2_dcid_set_path(ngtcp2_dcid *dcid, const ngtcp2_path *path); + +/* + * ngtcp2_dcid_copy copies |src| into |dest|. + */ +void ngtcp2_dcid_copy(ngtcp2_dcid *dest, const ngtcp2_dcid *src); + +/* + * ngtcp2_dcid_copy_cid_token behaves like ngtcp2_dcid_copy, but it + * only copies cid, seq, and path. + */ +void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src); + +/* + * ngtcp2_dcid_verify_uniqueness verifies uniqueness of (|seq|, |cid|, + * |token|) tuple against |dcid|. + */ +int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq, + const ngtcp2_cid *cid, const uint8_t *token); + +/* + * ngtcp2_dcid_verify_stateless_reset_token verifies stateless reset + * token |token| against the one included in |dcid|. This function + * returns 0 if the verification succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * Tokens do not match; or |dcid| does not contain a token. + */ +int ngtcp2_dcid_verify_stateless_reset_token(const ngtcp2_dcid *dcid, + const uint8_t *token); + +#endif /* NGTCP2_CID_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c new file mode 100644 index 0000000..fb00ca8 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c @@ -0,0 +1,13698 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_conn.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_macro.h" +#include "ngtcp2_log.h" +#include "ngtcp2_cid.h" +#include "ngtcp2_conv.h" +#include "ngtcp2_vec.h" +#include "ngtcp2_addr.h" +#include "ngtcp2_path.h" +#include "ngtcp2_rcvry.h" +#include "ngtcp2_unreachable.h" +#include "ngtcp2_net.h" + +/* NGTCP2_FLOW_WINDOW_RTT_FACTOR is the factor of RTT when flow + control window auto-tuning is triggered. */ +#define NGTCP2_FLOW_WINDOW_RTT_FACTOR 2 +/* NGTCP2_FLOW_WINDOW_SCALING_FACTOR is the growth factor of flow + control window. */ +#define NGTCP2_FLOW_WINDOW_SCALING_FACTOR 2 +/* NGTCP2_MIN_COALESCED_PAYLOADLEN is the minimum length of QUIC + packet payload that should be coalesced to a long packet. */ +#define NGTCP2_MIN_COALESCED_PAYLOADLEN 128 + +/* + * conn_local_stream returns nonzero if |stream_id| indicates that it + * is the stream initiated by local endpoint. + */ +static int conn_local_stream(ngtcp2_conn *conn, int64_t stream_id) { + return (uint8_t)(stream_id & 1) == conn->server; +} + +/* + * bidi_stream returns nonzero if |stream_id| is a bidirectional + * stream ID. + */ +static int bidi_stream(int64_t stream_id) { return (stream_id & 0x2) == 0; } + +/* + * conn_is_handshake_completed returns nonzero if QUIC handshake has + * completed. + */ +static int conn_is_handshake_completed(ngtcp2_conn *conn) { + return (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED) && + conn->pktns.crypto.rx.ckm && conn->pktns.crypto.tx.ckm; +} + +static int conn_call_recv_client_initial(ngtcp2_conn *conn, + const ngtcp2_cid *dcid) { + int rv; + + assert(conn->callbacks.recv_client_initial); + + rv = conn->callbacks.recv_client_initial(conn, dcid, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_handshake_completed(ngtcp2_conn *conn) { + int rv; + + if (!conn->callbacks.handshake_completed) { + return 0; + } + + rv = conn->callbacks.handshake_completed(conn, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_stream_data(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint32_t flags, uint64_t offset, + const uint8_t *data, size_t datalen) { + int rv; + + if (!conn->callbacks.recv_stream_data) { + return 0; + } + + rv = conn->callbacks.recv_stream_data(conn, flags, strm->stream_id, offset, + data, datalen, conn->user_data, + strm->stream_user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_crypto_data(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + uint64_t offset, const uint8_t *data, + size_t datalen) { + int rv; + + assert(conn->callbacks.recv_crypto_data); + + rv = conn->callbacks.recv_crypto_data(conn, crypto_level, offset, data, + datalen, conn->user_data); + switch (rv) { + case 0: + case NGTCP2_ERR_CRYPTO: + case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM: + case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM: + case NGTCP2_ERR_TRANSPORT_PARAM: + case NGTCP2_ERR_PROTO: + case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE: + case NGTCP2_ERR_NOMEM: + case NGTCP2_ERR_CALLBACK_FAILURE: + return rv; + default: + return NGTCP2_ERR_CALLBACK_FAILURE; + } +} + +static int conn_call_stream_open(ngtcp2_conn *conn, ngtcp2_strm *strm) { + int rv; + + if (!conn->callbacks.stream_open) { + return 0; + } + + rv = conn->callbacks.stream_open(conn, strm->stream_id, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_stream_close(ngtcp2_conn *conn, ngtcp2_strm *strm) { + int rv; + uint32_t flags = NGTCP2_STREAM_CLOSE_FLAG_NONE; + + if (!conn->callbacks.stream_close) { + return 0; + } + + if (strm->flags & NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET) { + flags |= NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET; + } + + rv = conn->callbacks.stream_close(conn, flags, strm->stream_id, + strm->app_error_code, conn->user_data, + strm->stream_user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_stream_reset(ngtcp2_conn *conn, int64_t stream_id, + uint64_t final_size, uint64_t app_error_code, + void *stream_user_data) { + int rv; + + if (!conn->callbacks.stream_reset) { + return 0; + } + + rv = conn->callbacks.stream_reset(conn, stream_id, final_size, app_error_code, + conn->user_data, stream_user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_extend_max_local_streams_bidi(ngtcp2_conn *conn, + uint64_t max_streams) { + int rv; + + if (!conn->callbacks.extend_max_local_streams_bidi) { + return 0; + } + + rv = conn->callbacks.extend_max_local_streams_bidi(conn, max_streams, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_extend_max_local_streams_uni(ngtcp2_conn *conn, + uint64_t max_streams) { + int rv; + + if (!conn->callbacks.extend_max_local_streams_uni) { + return 0; + } + + rv = conn->callbacks.extend_max_local_streams_uni(conn, max_streams, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_get_new_connection_id(ngtcp2_conn *conn, ngtcp2_cid *cid, + uint8_t *token, size_t cidlen) { + int rv; + + assert(conn->callbacks.get_new_connection_id); + + rv = conn->callbacks.get_new_connection_id(conn, cid, token, cidlen, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_remove_connection_id(ngtcp2_conn *conn, + const ngtcp2_cid *cid) { + int rv; + + if (!conn->callbacks.remove_connection_id) { + return 0; + } + + rv = conn->callbacks.remove_connection_id(conn, cid, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_path_validation(ngtcp2_conn *conn, const ngtcp2_pv *pv, + ngtcp2_path_validation_result res) { + int rv; + uint32_t flags = NGTCP2_PATH_VALIDATION_FLAG_NONE; + + if (!conn->callbacks.path_validation) { + return 0; + } + + if (pv->flags & NGTCP2_PV_FLAG_PREFERRED_ADDR) { + flags |= NGTCP2_PATH_VALIDATION_FLAG_PREFERRED_ADDR; + } + + rv = conn->callbacks.path_validation(conn, flags, &pv->dcid.ps.path, res, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_select_preferred_addr(ngtcp2_conn *conn, + ngtcp2_path *dest) { + int rv; + + if (!conn->callbacks.select_preferred_addr) { + return 0; + } + + assert(conn->remote.transport_params); + assert(conn->remote.transport_params->preferred_address_present); + + rv = conn->callbacks.select_preferred_addr( + conn, dest, &conn->remote.transport_params->preferred_address, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_extend_max_remote_streams_bidi(ngtcp2_conn *conn, + uint64_t max_streams) { + int rv; + + if (!conn->callbacks.extend_max_remote_streams_bidi) { + return 0; + } + + rv = conn->callbacks.extend_max_remote_streams_bidi(conn, max_streams, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_extend_max_remote_streams_uni(ngtcp2_conn *conn, + uint64_t max_streams) { + int rv; + + if (!conn->callbacks.extend_max_remote_streams_uni) { + return 0; + } + + rv = conn->callbacks.extend_max_remote_streams_uni(conn, max_streams, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_extend_max_stream_data(ngtcp2_conn *conn, + ngtcp2_strm *strm, + int64_t stream_id, + uint64_t datalen) { + int rv; + + if (!conn->callbacks.extend_max_stream_data) { + return 0; + } + + rv = conn->callbacks.extend_max_stream_data( + conn, stream_id, datalen, conn->user_data, strm->stream_user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_dcid_status(ngtcp2_conn *conn, + ngtcp2_connection_id_status_type type, + const ngtcp2_dcid *dcid) { + int rv; + + if (!conn->callbacks.dcid_status) { + return 0; + } + + rv = conn->callbacks.dcid_status( + conn, (int)type, dcid->seq, &dcid->cid, + (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) ? dcid->token : NULL, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_activate_dcid(ngtcp2_conn *conn, const ngtcp2_dcid *dcid) { + return conn_call_dcid_status(conn, NGTCP2_CONNECTION_ID_STATUS_TYPE_ACTIVATE, + dcid); +} + +static int conn_call_deactivate_dcid(ngtcp2_conn *conn, + const ngtcp2_dcid *dcid) { + return conn_call_dcid_status( + conn, NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE, dcid); +} + +static int conn_call_stream_stop_sending(ngtcp2_conn *conn, int64_t stream_id, + uint64_t app_error_code, + void *stream_user_data) { + int rv; + + if (!conn->callbacks.stream_stop_sending) { + return 0; + } + + rv = conn->callbacks.stream_stop_sending(conn, stream_id, app_error_code, + conn->user_data, stream_user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static void conn_call_delete_crypto_aead_ctx(ngtcp2_conn *conn, + ngtcp2_crypto_aead_ctx *aead_ctx) { + if (!aead_ctx->native_handle) { + return; + } + + assert(conn->callbacks.delete_crypto_aead_ctx); + + conn->callbacks.delete_crypto_aead_ctx(conn, aead_ctx, conn->user_data); +} + +static void +conn_call_delete_crypto_cipher_ctx(ngtcp2_conn *conn, + ngtcp2_crypto_cipher_ctx *cipher_ctx) { + if (!cipher_ctx->native_handle) { + return; + } + + assert(conn->callbacks.delete_crypto_cipher_ctx); + + conn->callbacks.delete_crypto_cipher_ctx(conn, cipher_ctx, conn->user_data); +} + +static int conn_call_client_initial(ngtcp2_conn *conn) { + int rv; + + assert(conn->callbacks.client_initial); + + rv = conn->callbacks.client_initial(conn, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_get_path_challenge_data(ngtcp2_conn *conn, uint8_t *data) { + int rv; + + assert(conn->callbacks.get_path_challenge_data); + + rv = conn->callbacks.get_path_challenge_data(conn, data, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_version_negotiation(ngtcp2_conn *conn, + const ngtcp2_pkt_hd *hd, + const uint32_t *sv, size_t nsv) { + int rv; + + if (!conn->callbacks.recv_version_negotiation) { + return 0; + } + + rv = conn->callbacks.recv_version_negotiation(conn, hd, sv, nsv, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_retry(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd) { + int rv; + + assert(conn->callbacks.recv_retry); + + rv = conn->callbacks.recv_retry(conn, hd, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int +conn_call_recv_stateless_reset(ngtcp2_conn *conn, + const ngtcp2_pkt_stateless_reset *sr) { + int rv; + + if (!conn->callbacks.recv_stateless_reset) { + return 0; + } + + rv = conn->callbacks.recv_stateless_reset(conn, sr, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_new_token(ngtcp2_conn *conn, const uint8_t *token, + size_t tokenlen) { + int rv; + + if (!conn->callbacks.recv_new_token) { + return 0; + } + + rv = conn->callbacks.recv_new_token(conn, token, tokenlen, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_handshake_confirmed(ngtcp2_conn *conn) { + int rv; + + if (!conn->callbacks.handshake_confirmed) { + return 0; + } + + rv = conn->callbacks.handshake_confirmed(conn, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_datagram(ngtcp2_conn *conn, + const ngtcp2_datagram *fr) { + const uint8_t *data; + size_t datalen; + int rv; + uint32_t flags = NGTCP2_DATAGRAM_FLAG_NONE; + + if (!conn->callbacks.recv_datagram) { + return 0; + } + + if (fr->datacnt) { + assert(fr->datacnt == 1); + + data = fr->data->base; + datalen = fr->data->len; + } else { + data = NULL; + datalen = 0; + } + + if (!conn_is_handshake_completed(conn)) { + flags |= NGTCP2_DATAGRAM_FLAG_EARLY; + } + + rv = conn->callbacks.recv_datagram(conn, flags, data, datalen, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int +conn_call_update_key(ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, + const uint8_t *current_rx_secret, + const uint8_t *current_tx_secret, size_t secretlen) { + int rv; + + assert(conn->callbacks.update_key); + + rv = conn->callbacks.update_key( + conn, rx_secret, tx_secret, rx_aead_ctx, rx_iv, tx_aead_ctx, tx_iv, + current_rx_secret, current_tx_secret, secretlen, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_version_negotiation(ngtcp2_conn *conn, uint32_t version, + const ngtcp2_cid *dcid) { + int rv; + + assert(conn->callbacks.version_negotiation); + + rv = + conn->callbacks.version_negotiation(conn, version, dcid, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_rx_key(ngtcp2_conn *conn, ngtcp2_crypto_level level) { + int rv; + + if (!conn->callbacks.recv_rx_key) { + return 0; + } + + rv = conn->callbacks.recv_rx_key(conn, level, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int conn_call_recv_tx_key(ngtcp2_conn *conn, ngtcp2_crypto_level level) { + int rv; + + if (!conn->callbacks.recv_tx_key) { + return 0; + } + + rv = conn->callbacks.recv_tx_key(conn, level, conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int crypto_offset_less(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs) { + return *(int64_t *)lhs < *(int64_t *)rhs; +} + +static int pktns_init(ngtcp2_pktns *pktns, ngtcp2_pktns_id pktns_id, + ngtcp2_rst *rst, ngtcp2_cc *cc, ngtcp2_log *log, + ngtcp2_qlog *qlog, ngtcp2_objalloc *rtb_entry_objalloc, + ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { + int rv; + + memset(pktns, 0, sizeof(*pktns)); + + ngtcp2_gaptr_init(&pktns->rx.pngap, mem); + + pktns->tx.last_pkt_num = -1; + pktns->rx.max_pkt_num = -1; + pktns->rx.max_ack_eliciting_pkt_num = -1; + + rv = ngtcp2_acktr_init(&pktns->acktr, log, mem); + if (rv != 0) { + goto fail_acktr_init; + } + + ngtcp2_strm_init(&pktns->crypto.strm, 0, NGTCP2_STRM_FLAG_NONE, 0, 0, NULL, + NULL, mem); + + ngtcp2_ksl_init(&pktns->crypto.tx.frq, crypto_offset_less, sizeof(uint64_t), + mem); + + ngtcp2_rtb_init(&pktns->rtb, pktns_id, &pktns->crypto.strm, rst, cc, log, + qlog, rtb_entry_objalloc, frc_objalloc, mem); + + return 0; + +fail_acktr_init: + ngtcp2_gaptr_free(&pktns->rx.pngap); + + return rv; +} + +static int pktns_new(ngtcp2_pktns **ppktns, ngtcp2_pktns_id pktns_id, + ngtcp2_rst *rst, ngtcp2_cc *cc, ngtcp2_log *log, + ngtcp2_qlog *qlog, ngtcp2_objalloc *rtb_entry_objalloc, + ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { + int rv; + + *ppktns = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pktns)); + if (*ppktns == NULL) { + return NGTCP2_ERR_NOMEM; + } + + rv = pktns_init(*ppktns, pktns_id, rst, cc, log, qlog, rtb_entry_objalloc, + frc_objalloc, mem); + if (rv != 0) { + ngtcp2_mem_free(mem, *ppktns); + } + + return rv; +} + +static int cycle_less(const ngtcp2_pq_entry *lhs, const ngtcp2_pq_entry *rhs) { + ngtcp2_strm *ls = ngtcp2_struct_of(lhs, ngtcp2_strm, pe); + ngtcp2_strm *rs = ngtcp2_struct_of(rhs, ngtcp2_strm, pe); + + if (ls->cycle == rs->cycle) { + return ls->stream_id < rs->stream_id; + } + + return rs->cycle - ls->cycle <= 1; +} + +static void delete_buffed_pkts(ngtcp2_pkt_chain *pc, const ngtcp2_mem *mem) { + ngtcp2_pkt_chain *next; + + for (; pc;) { + next = pc->next; + ngtcp2_pkt_chain_del(pc, mem); + pc = next; + } +} + +static void delete_buf_chain(ngtcp2_buf_chain *bufchain, + const ngtcp2_mem *mem) { + ngtcp2_buf_chain *next; + + for (; bufchain;) { + next = bufchain->next; + ngtcp2_buf_chain_del(bufchain, mem); + bufchain = next; + } +} + +static void pktns_free(ngtcp2_pktns *pktns, const ngtcp2_mem *mem) { + ngtcp2_frame_chain *frc; + ngtcp2_ksl_it it; + + delete_buf_chain(pktns->crypto.tx.data, mem); + + delete_buffed_pkts(pktns->rx.buffed_pkts, mem); + + ngtcp2_frame_chain_list_objalloc_del(pktns->tx.frq, pktns->rtb.frc_objalloc, + mem); + + ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, mem); + ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, mem); + + for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + frc = ngtcp2_ksl_it_get(&it); + ngtcp2_frame_chain_objalloc_del(frc, pktns->rtb.frc_objalloc, mem); + } + + ngtcp2_ksl_free(&pktns->crypto.tx.frq); + ngtcp2_rtb_free(&pktns->rtb); + ngtcp2_strm_free(&pktns->crypto.strm); + ngtcp2_acktr_free(&pktns->acktr); + ngtcp2_gaptr_free(&pktns->rx.pngap); +} + +static void pktns_del(ngtcp2_pktns *pktns, const ngtcp2_mem *mem) { + if (pktns == NULL) { + return; + } + + pktns_free(pktns, mem); + + ngtcp2_mem_free(mem, pktns); +} + +static void cc_del(ngtcp2_cc *cc, ngtcp2_cc_algo cc_algo, + const ngtcp2_mem *mem) { + switch (cc_algo) { + case NGTCP2_CC_ALGO_RENO: + ngtcp2_cc_reno_cc_free(cc, mem); + break; + case NGTCP2_CC_ALGO_CUBIC: + ngtcp2_cc_cubic_cc_free(cc, mem); + break; + case NGTCP2_CC_ALGO_BBR: + ngtcp2_cc_bbr_cc_free(cc, mem); + break; + case NGTCP2_CC_ALGO_BBR2: + ngtcp2_cc_bbr2_cc_free(cc, mem); + break; + default: + break; + } +} + +static int cid_less(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { + return ngtcp2_cid_less(lhs, rhs); +} + +static int retired_ts_less(const ngtcp2_pq_entry *lhs, + const ngtcp2_pq_entry *rhs) { + const ngtcp2_scid *a = ngtcp2_struct_of(lhs, ngtcp2_scid, pe); + const ngtcp2_scid *b = ngtcp2_struct_of(rhs, ngtcp2_scid, pe); + + return a->retired_ts < b->retired_ts; +} + +/* + * conn_reset_conn_stat_cc resets congestion state in |cstat|. + */ +static void conn_reset_conn_stat_cc(ngtcp2_conn *conn, + ngtcp2_conn_stat *cstat) { + cstat->latest_rtt = 0; + cstat->min_rtt = UINT64_MAX; + cstat->smoothed_rtt = conn->local.settings.initial_rtt; + cstat->rttvar = conn->local.settings.initial_rtt / 2; + cstat->first_rtt_sample_ts = UINT64_MAX; + cstat->pto_count = 0; + cstat->loss_detection_timer = UINT64_MAX; + cstat->cwnd = + ngtcp2_cc_compute_initcwnd(conn->local.settings.max_tx_udp_payload_size); + cstat->ssthresh = UINT64_MAX; + cstat->congestion_recovery_start_ts = UINT64_MAX; + cstat->bytes_in_flight = 0; + cstat->delivery_rate_sec = 0; + cstat->pacing_rate = 0.0; + cstat->send_quantum = 64 * 1024; +} + +/* + * reset_conn_stat_recovery resets the fields related to the recovery + * function + */ +static void reset_conn_stat_recovery(ngtcp2_conn_stat *cstat) { + /* Initializes them with UINT64_MAX. */ + memset(cstat->loss_time, 0xff, sizeof(cstat->loss_time)); + memset(cstat->last_tx_pkt_ts, 0xff, sizeof(cstat->last_tx_pkt_ts)); +} + +/* + * conn_reset_conn_stat resets |cstat|. The following fields are not + * reset: initial_rtt and max_udp_payload_size. + */ +static void conn_reset_conn_stat(ngtcp2_conn *conn, ngtcp2_conn_stat *cstat) { + conn_reset_conn_stat_cc(conn, cstat); + reset_conn_stat_recovery(cstat); +} + +static void delete_scid(ngtcp2_ksl *scids, const ngtcp2_mem *mem) { + ngtcp2_ksl_it it; + + for (it = ngtcp2_ksl_begin(scids); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + ngtcp2_mem_free(mem, ngtcp2_ksl_it_get(&it)); + } +} + +/* + * compute_pto computes PTO. + */ +static ngtcp2_duration compute_pto(ngtcp2_duration smoothed_rtt, + ngtcp2_duration rttvar, + ngtcp2_duration max_ack_delay) { + ngtcp2_duration var = ngtcp2_max(4 * rttvar, NGTCP2_GRANULARITY); + return smoothed_rtt + var + max_ack_delay; +} + +/* + * conn_compute_initial_pto computes PTO using the initial RTT. + */ +static ngtcp2_duration conn_compute_initial_pto(ngtcp2_conn *conn, + ngtcp2_pktns *pktns) { + ngtcp2_duration initial_rtt = conn->local.settings.initial_rtt; + ngtcp2_duration max_ack_delay; + + if (pktns->rtb.pktns_id == NGTCP2_PKTNS_ID_APPLICATION && + conn->remote.transport_params) { + max_ack_delay = conn->remote.transport_params->max_ack_delay; + } else { + max_ack_delay = 0; + } + return compute_pto(initial_rtt, initial_rtt / 2, max_ack_delay); +} + +/* + * conn_compute_pto computes the current PTO. + */ +static ngtcp2_duration conn_compute_pto(ngtcp2_conn *conn, + ngtcp2_pktns *pktns) { + ngtcp2_conn_stat *cstat = &conn->cstat; + ngtcp2_duration max_ack_delay; + + if (pktns->rtb.pktns_id == NGTCP2_PKTNS_ID_APPLICATION && + conn->remote.transport_params) { + max_ack_delay = conn->remote.transport_params->max_ack_delay; + } else { + max_ack_delay = 0; + } + return compute_pto(cstat->smoothed_rtt, cstat->rttvar, max_ack_delay); +} + +ngtcp2_duration ngtcp2_conn_compute_pto(ngtcp2_conn *conn, + ngtcp2_pktns *pktns) { + return conn_compute_pto(conn, pktns); +} + +static void conn_handle_tx_ecn(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, + uint16_t *prtb_entry_flags, ngtcp2_pktns *pktns, + const ngtcp2_pkt_hd *hd, ngtcp2_tstamp ts) { + assert(pi); + + if (pi->ecn != NGTCP2_ECN_NOT_ECT) { + /* We have already made a transition of validation state and + deceided to send UDP datagram with ECN bit set. Coalesced QUIC + packets also bear ECN bits set. */ + if (pktns->tx.ecn.start_pkt_num == INT64_MAX) { + pktns->tx.ecn.start_pkt_num = hd->pkt_num; + } + + ++pktns->tx.ecn.validation_pkt_sent; + + if (prtb_entry_flags) { + *prtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ECN; + } + + ++pktns->tx.ecn.ect0; + + return; + } + + switch (conn->tx.ecn.state) { + case NGTCP2_ECN_STATE_TESTING: + if (conn->tx.ecn.validation_start_ts == UINT64_MAX) { + assert(0 == pktns->tx.ecn.validation_pkt_sent); + assert(0 == pktns->tx.ecn.validation_pkt_lost); + + conn->tx.ecn.validation_start_ts = ts; + } else if (ts - conn->tx.ecn.validation_start_ts >= + 3 * conn_compute_pto(conn, pktns)) { + conn->tx.ecn.state = NGTCP2_ECN_STATE_UNKNOWN; + break; + } + + if (pktns->tx.ecn.start_pkt_num == INT64_MAX) { + pktns->tx.ecn.start_pkt_num = hd->pkt_num; + } + + ++pktns->tx.ecn.validation_pkt_sent; + + if (++conn->tx.ecn.dgram_sent == NGTCP2_ECN_MAX_NUM_VALIDATION_PKTS) { + conn->tx.ecn.state = NGTCP2_ECN_STATE_UNKNOWN; + } + /* fall through */ + case NGTCP2_ECN_STATE_CAPABLE: + /* pi is provided per UDP datagram. */ + assert(NGTCP2_ECN_NOT_ECT == pi->ecn); + + pi->ecn = NGTCP2_ECN_ECT_0; + + if (prtb_entry_flags) { + *prtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ECN; + } + + ++pktns->tx.ecn.ect0; + break; + case NGTCP2_ECN_STATE_UNKNOWN: + case NGTCP2_ECN_STATE_FAILED: + break; + default: + ngtcp2_unreachable(); + } +} + +static void conn_reset_ecn_validation_state(ngtcp2_conn *conn) { + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + ngtcp2_pktns *pktns = &conn->pktns; + + conn->tx.ecn.state = NGTCP2_ECN_STATE_TESTING; + conn->tx.ecn.validation_start_ts = UINT64_MAX; + conn->tx.ecn.dgram_sent = 0; + + if (in_pktns) { + in_pktns->tx.ecn.start_pkt_num = INT64_MAX; + in_pktns->tx.ecn.validation_pkt_sent = 0; + in_pktns->tx.ecn.validation_pkt_lost = 0; + } + + if (hs_pktns) { + hs_pktns->tx.ecn.start_pkt_num = INT64_MAX; + hs_pktns->tx.ecn.validation_pkt_sent = 0; + hs_pktns->tx.ecn.validation_pkt_lost = 0; + } + + pktns->tx.ecn.start_pkt_num = INT64_MAX; + pktns->tx.ecn.validation_pkt_sent = 0; + pktns->tx.ecn.validation_pkt_lost = 0; +} + +/* server_default_available_versions is the default available_versions + field sent by server. */ +static uint8_t server_default_available_versions[] = {0, 0, 0, 1}; + +/* + * available_versions_new allocates new buffer, and writes |versions| + * of length |versionslen| in network byte order, suitable for sending + * in available_versions field of version_information QUIC transport + * parameter. The pointer to the allocated buffer is assigned to + * |*pbuf|. + * + * This function returns 0 if it succeeds, or one of the negative + * error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int available_versions_new(uint8_t **pbuf, const uint32_t *versions, + size_t versionslen, const ngtcp2_mem *mem) { + size_t i; + uint8_t *buf = ngtcp2_mem_malloc(mem, sizeof(uint32_t) * versionslen); + + if (buf == NULL) { + return NGTCP2_ERR_NOMEM; + } + + *pbuf = buf; + + for (i = 0; i < versionslen; ++i) { + buf = ngtcp2_put_uint32be(buf, versions[i]); + } + + return 0; +} + +static void +conn_set_local_transport_params(ngtcp2_conn *conn, + const ngtcp2_transport_params *params) { + ngtcp2_transport_params *p = &conn->local.transport_params; + uint32_t chosen_version = p->version_info.chosen_version; + + *p = *params; + + /* grease_quic_bit is always enabled. */ + p->grease_quic_bit = 1; + + if (conn->server) { + p->version_info.chosen_version = chosen_version; + } else { + p->version_info.chosen_version = conn->client_chosen_version; + } + p->version_info.available_versions = conn->vneg.available_versions; + p->version_info.available_versionslen = conn->vneg.available_versionslen; + p->version_info_present = 1; +} + +static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, const ngtcp2_path *path, + uint32_t client_chosen_version, int callbacks_version, + const ngtcp2_callbacks *callbacks, int settings_version, + const ngtcp2_settings *settings, + int transport_params_version, + const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data, int server) { + int rv; + ngtcp2_scid *scident; + uint8_t *buf; + uint8_t fixed_bit_byte; + size_t i; + uint32_t *preferred_versions; + (void)callbacks_version; + (void)settings_version; + (void)transport_params_version; + + assert(settings->max_window <= NGTCP2_MAX_VARINT); + assert(settings->max_stream_window <= NGTCP2_MAX_VARINT); + assert(settings->max_tx_udp_payload_size); + assert(settings->max_tx_udp_payload_size <= NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE); + assert(params->active_connection_id_limit <= NGTCP2_MAX_DCID_POOL_SIZE); + assert(params->initial_max_data <= NGTCP2_MAX_VARINT); + assert(params->initial_max_stream_data_bidi_local <= NGTCP2_MAX_VARINT); + assert(params->initial_max_stream_data_bidi_remote <= NGTCP2_MAX_VARINT); + assert(params->initial_max_stream_data_uni <= NGTCP2_MAX_VARINT); + assert(server || callbacks->client_initial); + assert(!server || callbacks->recv_client_initial); + assert(callbacks->recv_crypto_data); + assert(callbacks->encrypt); + assert(callbacks->decrypt); + assert(callbacks->hp_mask); + assert(server || callbacks->recv_retry); + assert(callbacks->rand); + assert(callbacks->get_new_connection_id); + assert(callbacks->update_key); + assert(callbacks->delete_crypto_aead_ctx); + assert(callbacks->delete_crypto_cipher_ctx); + assert(callbacks->get_path_challenge_data); + assert(!server || !ngtcp2_is_reserved_version(client_chosen_version)); + + if (mem == NULL) { + mem = ngtcp2_mem_default(); + } + + *pconn = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_conn)); + if (*pconn == NULL) { + rv = NGTCP2_ERR_NOMEM; + goto fail_conn; + } + + ngtcp2_objalloc_frame_chain_init(&(*pconn)->frc_objalloc, 64, mem); + ngtcp2_objalloc_rtb_entry_init(&(*pconn)->rtb_entry_objalloc, 64, mem); + ngtcp2_objalloc_strm_init(&(*pconn)->strm_objalloc, 64, mem); + + ngtcp2_static_ringbuf_dcid_bound_init(&(*pconn)->dcid.bound); + + ngtcp2_static_ringbuf_dcid_unused_init(&(*pconn)->dcid.unused); + + ngtcp2_static_ringbuf_dcid_retired_init(&(*pconn)->dcid.retired); + + ngtcp2_gaptr_init(&(*pconn)->dcid.seqgap, mem); + + ngtcp2_ksl_init(&(*pconn)->scid.set, cid_less, sizeof(ngtcp2_cid), mem); + + ngtcp2_pq_init(&(*pconn)->scid.used, retired_ts_less, mem); + + ngtcp2_map_init(&(*pconn)->strms, mem); + + ngtcp2_pq_init(&(*pconn)->tx.strmq, cycle_less, mem); + + ngtcp2_idtr_init(&(*pconn)->remote.bidi.idtr, !server, mem); + + ngtcp2_idtr_init(&(*pconn)->remote.uni.idtr, !server, mem); + + ngtcp2_static_ringbuf_path_challenge_init(&(*pconn)->rx.path_challenge); + + ngtcp2_log_init(&(*pconn)->log, scid, settings->log_printf, + settings->initial_ts, user_data); + ngtcp2_qlog_init(&(*pconn)->qlog, settings->qlog.write, settings->initial_ts, + user_data); + if ((*pconn)->qlog.write) { + buf = ngtcp2_mem_malloc(mem, NGTCP2_QLOG_BUFLEN); + if (buf == NULL) { + rv = NGTCP2_ERR_NOMEM; + goto fail_qlog_buf; + } + ngtcp2_buf_init(&(*pconn)->qlog.buf, buf, NGTCP2_QLOG_BUFLEN); + } + + (*pconn)->local.settings = *settings; + + if (settings->tokenlen) { + buf = ngtcp2_mem_malloc(mem, settings->tokenlen); + if (buf == NULL) { + rv = NGTCP2_ERR_NOMEM; + goto fail_token; + } + memcpy(buf, settings->token, settings->tokenlen); + (*pconn)->local.settings.token = buf; + } else { + (*pconn)->local.settings.token = NULL; + } + + if (!(*pconn)->local.settings.original_version) { + (*pconn)->local.settings.original_version = client_chosen_version; + } + + conn_reset_conn_stat(*pconn, &(*pconn)->cstat); + (*pconn)->cstat.initial_rtt = settings->initial_rtt; + (*pconn)->cstat.max_tx_udp_payload_size = + (*pconn)->local.settings.max_tx_udp_payload_size; + + ngtcp2_rst_init(&(*pconn)->rst); + + (*pconn)->cc_algo = settings->cc_algo; + + switch (settings->cc_algo) { + case NGTCP2_CC_ALGO_RENO: + rv = ngtcp2_cc_reno_cc_init(&(*pconn)->cc, &(*pconn)->log, mem); + if (rv != 0) { + goto fail_cc_init; + } + break; + case NGTCP2_CC_ALGO_CUBIC: + rv = ngtcp2_cc_cubic_cc_init(&(*pconn)->cc, &(*pconn)->log, mem); + if (rv != 0) { + goto fail_cc_init; + } + break; + case NGTCP2_CC_ALGO_BBR: + rv = ngtcp2_cc_bbr_cc_init(&(*pconn)->cc, &(*pconn)->log, &(*pconn)->cstat, + &(*pconn)->rst, settings->initial_ts, + callbacks->rand, &settings->rand_ctx, mem); + if (rv != 0) { + goto fail_cc_init; + } + break; + case NGTCP2_CC_ALGO_BBR2: + rv = ngtcp2_cc_bbr2_cc_init(&(*pconn)->cc, &(*pconn)->log, &(*pconn)->cstat, + &(*pconn)->rst, settings->initial_ts, + callbacks->rand, &settings->rand_ctx, mem); + if (rv != 0) { + goto fail_cc_init; + } + break; + default: + ngtcp2_unreachable(); + } + + rv = pktns_new(&(*pconn)->in_pktns, NGTCP2_PKTNS_ID_INITIAL, &(*pconn)->rst, + &(*pconn)->cc, &(*pconn)->log, &(*pconn)->qlog, + &(*pconn)->rtb_entry_objalloc, &(*pconn)->frc_objalloc, mem); + if (rv != 0) { + goto fail_in_pktns_init; + } + + rv = pktns_new(&(*pconn)->hs_pktns, NGTCP2_PKTNS_ID_HANDSHAKE, &(*pconn)->rst, + &(*pconn)->cc, &(*pconn)->log, &(*pconn)->qlog, + &(*pconn)->rtb_entry_objalloc, &(*pconn)->frc_objalloc, mem); + if (rv != 0) { + goto fail_hs_pktns_init; + } + + rv = pktns_init(&(*pconn)->pktns, NGTCP2_PKTNS_ID_APPLICATION, &(*pconn)->rst, + &(*pconn)->cc, &(*pconn)->log, &(*pconn)->qlog, + &(*pconn)->rtb_entry_objalloc, &(*pconn)->frc_objalloc, mem); + if (rv != 0) { + goto fail_pktns_init; + } + + scident = ngtcp2_mem_malloc(mem, sizeof(*scident)); + if (scident == NULL) { + rv = NGTCP2_ERR_NOMEM; + goto fail_scident; + } + + /* Set stateless reset token later if it is available in the local + transport parameters */ + ngtcp2_scid_init(scident, 0, scid); + + rv = ngtcp2_ksl_insert(&(*pconn)->scid.set, NULL, &scident->cid, scident); + if (rv != 0) { + goto fail_scid_set_insert; + } + + scident = NULL; + + ngtcp2_dcid_init(&(*pconn)->dcid.current, 0, dcid, NULL); + ngtcp2_dcid_set_path(&(*pconn)->dcid.current, path); + + rv = ngtcp2_gaptr_push(&(*pconn)->dcid.seqgap, 0, 1); + if (rv != 0) { + goto fail_seqgap_push; + } + + if (settings->preferred_versionslen) { + if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) { + for (i = 0; i < settings->preferred_versionslen; ++i) { + if (settings->preferred_versions[i] == client_chosen_version) { + break; + } + } + + assert(i < settings->preferred_versionslen); + } + + preferred_versions = ngtcp2_mem_malloc( + mem, sizeof(uint32_t) * settings->preferred_versionslen); + if (preferred_versions == NULL) { + rv = NGTCP2_ERR_NOMEM; + goto fail_preferred_versions; + } + + for (i = 0; i < settings->preferred_versionslen; ++i) { + assert(ngtcp2_is_supported_version(settings->preferred_versions[i])); + + preferred_versions[i] = settings->preferred_versions[i]; + } + + (*pconn)->vneg.preferred_versions = preferred_versions; + (*pconn)->vneg.preferred_versionslen = settings->preferred_versionslen; + } + + if (settings->available_versionslen) { + if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) { + for (i = 0; i < settings->available_versionslen; ++i) { + if (settings->available_versions[i] == client_chosen_version) { + break; + } + } + + assert(i < settings->available_versionslen); + } + + for (i = 0; i < settings->available_versionslen; ++i) { + assert(ngtcp2_is_reserved_version(settings->available_versions[i]) || + ngtcp2_is_supported_version(settings->available_versions[i])); + } + + rv = available_versions_new(&buf, settings->available_versions, + settings->available_versionslen, mem); + if (rv != 0) { + goto fail_available_versions; + } + + (*pconn)->vneg.available_versions = buf; + (*pconn)->vneg.available_versionslen = + sizeof(uint32_t) * settings->available_versionslen; + } else if (server) { + if (settings->preferred_versionslen) { + rv = available_versions_new(&buf, settings->preferred_versions, + settings->preferred_versionslen, mem); + if (rv != 0) { + goto fail_available_versions; + } + + (*pconn)->vneg.available_versions = buf; + (*pconn)->vneg.available_versionslen = + sizeof(uint32_t) * settings->preferred_versionslen; + } else { + (*pconn)->vneg.available_versions = server_default_available_versions; + (*pconn)->vneg.available_versionslen = + sizeof(server_default_available_versions); + } + } else if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) { + rv = available_versions_new(&buf, &client_chosen_version, 1, mem); + if (rv != 0) { + goto fail_available_versions; + } + + (*pconn)->vneg.available_versions = buf; + (*pconn)->vneg.available_versionslen = sizeof(uint32_t); + } + + (*pconn)->client_chosen_version = client_chosen_version; + + conn_set_local_transport_params(*pconn, params); + + callbacks->rand(&fixed_bit_byte, 1, &settings->rand_ctx); + if (fixed_bit_byte & 1) { + (*pconn)->flags |= NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT; + } + + (*pconn)->keep_alive.last_ts = UINT64_MAX; + + (*pconn)->server = server; + (*pconn)->oscid = *scid; + (*pconn)->callbacks = *callbacks; + (*pconn)->mem = mem; + (*pconn)->user_data = user_data; + (*pconn)->idle_ts = settings->initial_ts; + (*pconn)->crypto.key_update.confirmed_ts = UINT64_MAX; + (*pconn)->tx.last_max_data_ts = UINT64_MAX; + (*pconn)->tx.pacing.next_ts = UINT64_MAX; + (*pconn)->early.discard_started_ts = UINT64_MAX; + + conn_reset_ecn_validation_state(*pconn); + + ngtcp2_qlog_start(&(*pconn)->qlog, server ? &settings->qlog.odcid : dcid, + server); + + return 0; + +fail_available_versions: + ngtcp2_mem_free(mem, (*pconn)->vneg.preferred_versions); +fail_preferred_versions: +fail_seqgap_push: +fail_scid_set_insert: + ngtcp2_mem_free(mem, scident); +fail_scident: + pktns_free(&(*pconn)->pktns, mem); +fail_pktns_init: + pktns_del((*pconn)->hs_pktns, mem); +fail_hs_pktns_init: + pktns_del((*pconn)->in_pktns, mem); +fail_in_pktns_init: + cc_del(&(*pconn)->cc, settings->cc_algo, mem); +fail_cc_init: + ngtcp2_mem_free(mem, (uint8_t *)(*pconn)->local.settings.token); +fail_token: + ngtcp2_mem_free(mem, (*pconn)->qlog.buf.begin); +fail_qlog_buf: + ngtcp2_idtr_free(&(*pconn)->remote.uni.idtr); + ngtcp2_idtr_free(&(*pconn)->remote.bidi.idtr); + ngtcp2_map_free(&(*pconn)->strms); + delete_scid(&(*pconn)->scid.set, mem); + ngtcp2_ksl_free(&(*pconn)->scid.set); + ngtcp2_gaptr_free(&(*pconn)->dcid.seqgap); + ngtcp2_objalloc_free(&(*pconn)->strm_objalloc); + ngtcp2_objalloc_free(&(*pconn)->rtb_entry_objalloc); + ngtcp2_objalloc_free(&(*pconn)->frc_objalloc); + ngtcp2_mem_free(mem, *pconn); +fail_conn: + return rv; +} + +int ngtcp2_conn_client_new_versioned( + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data) { + int rv; + + rv = conn_new(pconn, dcid, scid, path, client_chosen_version, + callbacks_version, callbacks, settings_version, settings, + transport_params_version, params, mem, user_data, 0); + if (rv != 0) { + return rv; + } + (*pconn)->rcid = *dcid; + (*pconn)->state = NGTCP2_CS_CLIENT_INITIAL; + (*pconn)->local.bidi.next_stream_id = 0; + (*pconn)->local.uni.next_stream_id = 2; + + rv = ngtcp2_conn_commit_local_transport_params(*pconn); + if (rv != 0) { + ngtcp2_conn_del(*pconn); + return rv; + } + + return 0; +} + +int ngtcp2_conn_server_new_versioned( + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data) { + int rv; + + rv = conn_new(pconn, dcid, scid, path, client_chosen_version, + callbacks_version, callbacks, settings_version, settings, + transport_params_version, params, mem, user_data, 1); + if (rv != 0) { + return rv; + } + + (*pconn)->state = NGTCP2_CS_SERVER_INITIAL; + (*pconn)->local.bidi.next_stream_id = 1; + (*pconn)->local.uni.next_stream_id = 3; + + if ((*pconn)->local.settings.tokenlen) { + /* Usage of token lifts amplification limit */ + (*pconn)->dcid.current.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED; + } + + return 0; +} + +/* + * conn_fc_credits returns the number of bytes allowed to be sent to + * the given stream. Both connection and stream level flow control + * credits are considered. + */ +static uint64_t conn_fc_credits(ngtcp2_conn *conn, ngtcp2_strm *strm) { + return ngtcp2_min(strm->tx.max_offset - strm->tx.offset, + conn->tx.max_offset - conn->tx.offset); +} + +/* + * conn_enforce_flow_control returns the number of bytes allowed to be + * sent to the given stream. |len| might be shorted because of + * available flow control credits. + */ +static uint64_t conn_enforce_flow_control(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint64_t len) { + uint64_t fc_credits = conn_fc_credits(conn, strm); + return ngtcp2_min(len, fc_credits); +} + +static int delete_strms_each(void *data, void *ptr) { + ngtcp2_conn *conn = ptr; + ngtcp2_strm *s = data; + + ngtcp2_strm_free(s); + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, s); + + return 0; +} + +static void conn_vneg_crypto_free(ngtcp2_conn *conn) { + if (conn->vneg.rx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.rx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.rx.hp_ctx); + + if (conn->vneg.tx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.tx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.tx.hp_ctx); + + ngtcp2_crypto_km_del(conn->vneg.rx.ckm, conn->mem); + ngtcp2_crypto_km_del(conn->vneg.tx.ckm, conn->mem); +} + +void ngtcp2_conn_del(ngtcp2_conn *conn) { + if (conn == NULL) { + return; + } + + ngtcp2_qlog_end(&conn->qlog); + + if (conn->early.ckm) { + conn_call_delete_crypto_aead_ctx(conn, &conn->early.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->early.hp_ctx); + + if (conn->crypto.key_update.old_rx_ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx); + } + if (conn->crypto.key_update.new_rx_ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->crypto.key_update.new_rx_ckm->aead_ctx); + } + if (conn->crypto.key_update.new_tx_ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->crypto.key_update.new_tx_ckm->aead_ctx); + } + + if (conn->pktns.crypto.rx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, + &conn->pktns.crypto.rx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->pktns.crypto.rx.hp_ctx); + + if (conn->pktns.crypto.tx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, + &conn->pktns.crypto.tx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->pktns.crypto.tx.hp_ctx); + + if (conn->hs_pktns) { + if (conn->hs_pktns->crypto.rx.ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->hs_pktns->crypto.rx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->hs_pktns->crypto.rx.hp_ctx); + + if (conn->hs_pktns->crypto.tx.ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->hs_pktns->crypto.tx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->hs_pktns->crypto.tx.hp_ctx); + } + if (conn->in_pktns) { + if (conn->in_pktns->crypto.rx.ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->in_pktns->crypto.rx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->in_pktns->crypto.rx.hp_ctx); + + if (conn->in_pktns->crypto.tx.ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->in_pktns->crypto.tx.ckm->aead_ctx); + } + conn_call_delete_crypto_cipher_ctx(conn, &conn->in_pktns->crypto.tx.hp_ctx); + } + + conn_call_delete_crypto_aead_ctx(conn, &conn->crypto.retry_aead_ctx); + + ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem); + ngtcp2_transport_params_del(conn->remote.pending_transport_params, conn->mem); + + conn_vneg_crypto_free(conn); + + ngtcp2_mem_free(conn->mem, conn->vneg.preferred_versions); + if (conn->vneg.available_versions != server_default_available_versions) { + ngtcp2_mem_free(conn->mem, conn->vneg.available_versions); + } + + ngtcp2_mem_free(conn->mem, conn->crypto.decrypt_buf.base); + ngtcp2_mem_free(conn->mem, conn->crypto.decrypt_hp_buf.base); + ngtcp2_mem_free(conn->mem, (uint8_t *)conn->local.settings.token); + + ngtcp2_crypto_km_del(conn->crypto.key_update.old_rx_ckm, conn->mem); + ngtcp2_crypto_km_del(conn->crypto.key_update.new_rx_ckm, conn->mem); + ngtcp2_crypto_km_del(conn->crypto.key_update.new_tx_ckm, conn->mem); + ngtcp2_crypto_km_del(conn->early.ckm, conn->mem); + + pktns_free(&conn->pktns, conn->mem); + pktns_del(conn->hs_pktns, conn->mem); + pktns_del(conn->in_pktns, conn->mem); + + cc_del(&conn->cc, conn->cc_algo, conn->mem); + + ngtcp2_mem_free(conn->mem, conn->qlog.buf.begin); + + ngtcp2_pmtud_del(conn->pmtud); + ngtcp2_pv_del(conn->pv); + + ngtcp2_mem_free(conn->mem, (uint8_t *)conn->rx.ccerr.reason); + + ngtcp2_idtr_free(&conn->remote.uni.idtr); + ngtcp2_idtr_free(&conn->remote.bidi.idtr); + ngtcp2_mem_free(conn->mem, conn->tx.ack); + ngtcp2_pq_free(&conn->tx.strmq); + ngtcp2_map_each_free(&conn->strms, delete_strms_each, (void *)conn); + ngtcp2_map_free(&conn->strms); + + ngtcp2_pq_free(&conn->scid.used); + delete_scid(&conn->scid.set, conn->mem); + ngtcp2_ksl_free(&conn->scid.set); + ngtcp2_gaptr_free(&conn->dcid.seqgap); + + ngtcp2_objalloc_free(&conn->strm_objalloc); + ngtcp2_objalloc_free(&conn->rtb_entry_objalloc); + ngtcp2_objalloc_free(&conn->frc_objalloc); + + ngtcp2_mem_free(conn->mem, conn); +} + +/* + * conn_ensure_ack_ranges makes sure that conn->tx.ack->ack.ranges can + * contain at least |n| additional ngtcp2_ack_range. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_ensure_ack_ranges(ngtcp2_conn *conn, size_t n) { + ngtcp2_frame *fr; + size_t max = conn->tx.max_ack_ranges; + + if (n <= max) { + return 0; + } + + max *= 2; + + assert(max >= n); + + fr = ngtcp2_mem_realloc(conn->mem, conn->tx.ack, + sizeof(ngtcp2_ack) + sizeof(ngtcp2_ack_range) * max); + if (fr == NULL) { + return NGTCP2_ERR_NOMEM; + } + + conn->tx.ack = fr; + conn->tx.max_ack_ranges = max; + + return 0; +} + +/* + * conn_compute_ack_delay computes ACK delay for outgoing protected + * ACK. + */ +static ngtcp2_duration conn_compute_ack_delay(ngtcp2_conn *conn) { + return ngtcp2_min(conn->local.transport_params.max_ack_delay, + conn->cstat.smoothed_rtt / 8); +} + +/* + * conn_create_ack_frame creates ACK frame, and assigns its pointer to + * |*pfr| if there are any received packets to acknowledge. If there + * are no packets to acknowledge, this function returns 0, and |*pfr| + * is untouched. The caller is advised to set |*pfr| to NULL before + * calling this function, and check it after this function returns. + * If |nodelay| is nonzero, delayed ACK timer is ignored. + * + * The memory for ACK frame is dynamically allocated by this function. + * A caller is responsible to free it. + * + * Call ngtcp2_acktr_commit_ack after a created ACK frame is + * successfully serialized into a packet. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_create_ack_frame(ngtcp2_conn *conn, ngtcp2_frame **pfr, + ngtcp2_pktns *pktns, uint8_t type, + ngtcp2_tstamp ts, ngtcp2_duration ack_delay, + uint64_t ack_delay_exponent) { + /* TODO Measure an actual size of ACK blocks to find the best + default value. */ + const size_t initial_max_ack_ranges = 8; + int64_t last_pkt_num; + ngtcp2_acktr *acktr = &pktns->acktr; + ngtcp2_ack_range *range; + ngtcp2_ksl_it it; + ngtcp2_acktr_entry *rpkt; + ngtcp2_ack *ack; + size_t range_idx; + ngtcp2_tstamp largest_ack_ts; + int rv; + + if (acktr->flags & NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK) { + ack_delay = 0; + } + + if (!ngtcp2_acktr_require_active_ack(acktr, ack_delay, ts)) { + return 0; + } + + it = ngtcp2_acktr_get(acktr); + if (ngtcp2_ksl_it_end(&it)) { + ngtcp2_acktr_commit_ack(acktr); + return 0; + } + + if (conn->tx.ack == NULL) { + conn->tx.ack = ngtcp2_mem_malloc( + conn->mem, + sizeof(ngtcp2_ack) + sizeof(ngtcp2_ack_range) * initial_max_ack_ranges); + if (conn->tx.ack == NULL) { + return NGTCP2_ERR_NOMEM; + } + conn->tx.max_ack_ranges = initial_max_ack_ranges; + } + + ack = &conn->tx.ack->ack; + + if (pktns->rx.ecn.ect0 || pktns->rx.ecn.ect1 || pktns->rx.ecn.ce) { + ack->type = NGTCP2_FRAME_ACK_ECN; + ack->ecn.ect0 = pktns->rx.ecn.ect0; + ack->ecn.ect1 = pktns->rx.ecn.ect1; + ack->ecn.ce = pktns->rx.ecn.ce; + } else { + ack->type = NGTCP2_FRAME_ACK; + } + ack->rangecnt = 0; + + rpkt = ngtcp2_ksl_it_get(&it); + + if (rpkt->pkt_num == pktns->rx.max_pkt_num) { + last_pkt_num = rpkt->pkt_num - (int64_t)(rpkt->len - 1); + largest_ack_ts = rpkt->tstamp; + ack->largest_ack = rpkt->pkt_num; + ack->first_ack_range = rpkt->len - 1; + + ngtcp2_ksl_it_next(&it); + } else { + assert(rpkt->pkt_num < pktns->rx.max_pkt_num); + + last_pkt_num = pktns->rx.max_pkt_num; + largest_ack_ts = pktns->rx.max_pkt_ts; + ack->largest_ack = pktns->rx.max_pkt_num; + ack->first_ack_range = 0; + } + + if (type == NGTCP2_PKT_1RTT) { + ack->ack_delay_unscaled = ts - largest_ack_ts; + ack->ack_delay = ack->ack_delay_unscaled / NGTCP2_MICROSECONDS / + (1ULL << ack_delay_exponent); + } else { + ack->ack_delay_unscaled = 0; + ack->ack_delay = 0; + } + + for (; !ngtcp2_ksl_it_end(&it); ngtcp2_ksl_it_next(&it)) { + if (ack->rangecnt == NGTCP2_MAX_ACK_RANGES) { + break; + } + + rpkt = ngtcp2_ksl_it_get(&it); + + range_idx = ack->rangecnt++; + rv = conn_ensure_ack_ranges(conn, ack->rangecnt); + if (rv != 0) { + return rv; + } + ack = &conn->tx.ack->ack; + range = &ack->ranges[range_idx]; + range->gap = (uint64_t)(last_pkt_num - rpkt->pkt_num - 2); + range->len = rpkt->len - 1; + + last_pkt_num = rpkt->pkt_num - (int64_t)(rpkt->len - 1); + } + + /* TODO Just remove entries which cannot fit into a single ACK frame + for now. */ + if (!ngtcp2_ksl_it_end(&it)) { + ngtcp2_acktr_forget(acktr, ngtcp2_ksl_it_get(&it)); + } + + *pfr = conn->tx.ack; + + return 0; +} + +/* + * conn_ppe_write_frame writes |fr| to |ppe|. If |hd_logged| is not + * NULL and |*hd_logged| is zero, packet header is logged, and 1 is + * assigned to |*hd_logged|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer is too small. + */ +static int conn_ppe_write_frame_hd_log(ngtcp2_conn *conn, ngtcp2_ppe *ppe, + int *hd_logged, const ngtcp2_pkt_hd *hd, + ngtcp2_frame *fr) { + int rv; + + rv = ngtcp2_ppe_encode_frame(ppe, fr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + return rv; + } + + if (hd_logged && !*hd_logged) { + *hd_logged = 1; + ngtcp2_log_tx_pkt_hd(&conn->log, hd); + ngtcp2_qlog_pkt_sent_start(&conn->qlog); + } + + ngtcp2_log_tx_fr(&conn->log, hd, fr); + ngtcp2_qlog_write_frame(&conn->qlog, fr); + + return 0; +} + +/* + * conn_ppe_write_frame writes |fr| to |ppe|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer is too small. + */ +static int conn_ppe_write_frame(ngtcp2_conn *conn, ngtcp2_ppe *ppe, + const ngtcp2_pkt_hd *hd, ngtcp2_frame *fr) { + return conn_ppe_write_frame_hd_log(conn, ppe, NULL, hd, fr); +} + +/* + * conn_on_pkt_sent is called when new non-ACK-only packet is sent. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +static int conn_on_pkt_sent(ngtcp2_conn *conn, ngtcp2_rtb *rtb, + ngtcp2_rtb_entry *ent) { + int rv; + + /* This function implements OnPacketSent, but it handles only + non-ACK-only packet. */ + rv = ngtcp2_rtb_add(rtb, ent, &conn->cstat); + if (rv != 0) { + return rv; + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + conn->cstat.last_tx_pkt_ts[rtb->pktns_id] = ent->ts; + } + + ngtcp2_conn_set_loss_detection_timer(conn, ent->ts); + + return 0; +} + +/* + * pktns_select_pkt_numlen selects shortest packet number encoding for + * the next packet number based on the largest acknowledged packet + * number. It returns the number of bytes to encode the packet + * number. + */ +static size_t pktns_select_pkt_numlen(ngtcp2_pktns *pktns) { + int64_t pkt_num = pktns->tx.last_pkt_num + 1; + ngtcp2_rtb *rtb = &pktns->rtb; + int64_t n = pkt_num - rtb->largest_acked_tx_pkt_num; + + if (NGTCP2_MAX_PKT_NUM / 2 < n) { + return 4; + } + + n = n * 2 - 1; + + if (n > 0xffffff) { + return 4; + } + if (n > 0xffff) { + return 3; + } + if (n > 0xff) { + return 2; + } + return 1; +} + +/* + * conn_get_cwnd returns cwnd for the current path. + */ +static uint64_t conn_get_cwnd(ngtcp2_conn *conn) { + return conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) + ? ngtcp2_cc_compute_initcwnd(conn->cstat.max_tx_udp_payload_size) + : conn->cstat.cwnd; +} + +/* + * conn_cwnd_is_zero returns nonzero if the number of bytes the local + * endpoint can sent at this time is zero. + */ +static int conn_cwnd_is_zero(ngtcp2_conn *conn) { + uint64_t bytes_in_flight = conn->cstat.bytes_in_flight; + uint64_t cwnd = conn_get_cwnd(conn); + + if (bytes_in_flight >= cwnd) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV, + "cwnd limited bytes_in_flight=%lu cwnd=%lu", + bytes_in_flight, cwnd); + } + + return bytes_in_flight >= cwnd; +} + +/* + * conn_retry_early_payloadlen returns the estimated wire length of + * the first STREAM frame of 0-RTT packet which should be + * retransmitted due to Retry packet. + */ +static uint64_t conn_retry_early_payloadlen(ngtcp2_conn *conn) { + ngtcp2_frame_chain *frc; + ngtcp2_strm *strm; + uint64_t len; + + if (conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) { + return 0; + } + + for (; !ngtcp2_pq_empty(&conn->tx.strmq);) { + strm = ngtcp2_conn_tx_strmq_top(conn); + if (ngtcp2_strm_streamfrq_empty(strm)) { + ngtcp2_conn_tx_strmq_pop(conn); + continue; + } + + frc = ngtcp2_strm_streamfrq_top(strm); + + len = ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt) + + NGTCP2_STREAM_OVERHEAD; + + /* Take the min because in conn_should_pad_pkt we take max in + order to deal with unbreakable DATAGRAM. */ + return ngtcp2_min(len, NGTCP2_MIN_COALESCED_PAYLOADLEN); + } + + return 0; +} + +static void conn_cryptofrq_clear(ngtcp2_conn *conn, ngtcp2_pktns *pktns) { + ngtcp2_frame_chain *frc; + ngtcp2_ksl_it it; + + for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + frc = ngtcp2_ksl_it_get(&it); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + } + ngtcp2_ksl_clear(&pktns->crypto.tx.frq); +} + +/* + * conn_cryptofrq_unacked_offset returns the CRYPTO frame offset by + * taking into account acknowledged offset. If there is no data to + * send, this function returns (uint64_t)-1. + */ +static uint64_t conn_cryptofrq_unacked_offset(ngtcp2_conn *conn, + ngtcp2_pktns *pktns) { + ngtcp2_frame_chain *frc; + ngtcp2_crypto *fr; + ngtcp2_range gap; + ngtcp2_rtb *rtb = &pktns->rtb; + ngtcp2_ksl_it it; + uint64_t datalen; + + (void)conn; + + for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + frc = ngtcp2_ksl_it_get(&it); + fr = &frc->fr.crypto; + + gap = ngtcp2_strm_get_unacked_range_after(rtb->crypto, fr->offset); + + datalen = ngtcp2_vec_len(fr->data, fr->datacnt); + + if (gap.begin <= fr->offset) { + return fr->offset; + } + if (gap.begin < fr->offset + datalen) { + return gap.begin; + } + } + + return (uint64_t)-1; +} + +static int conn_cryptofrq_unacked_pop(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_frame_chain **pfrc) { + ngtcp2_frame_chain *frc, *nfrc; + ngtcp2_crypto *fr, *nfr; + uint64_t offset, end_offset; + size_t idx, end_idx; + uint64_t base_offset, end_base_offset; + ngtcp2_range gap; + ngtcp2_rtb *rtb = &pktns->rtb; + ngtcp2_vec *v; + int rv; + ngtcp2_ksl_it it; + + *pfrc = NULL; + + for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it);) { + frc = ngtcp2_ksl_it_get(&it); + fr = &frc->fr.crypto; + + ngtcp2_ksl_remove_hint(&pktns->crypto.tx.frq, &it, &it, &fr->offset); + + idx = 0; + offset = fr->offset; + base_offset = 0; + + gap = ngtcp2_strm_get_unacked_range_after(rtb->crypto, offset); + if (gap.begin < offset) { + gap.begin = offset; + } + + for (; idx < fr->datacnt && offset < gap.begin; ++idx) { + v = &fr->data[idx]; + if (offset + v->len > gap.begin) { + base_offset = gap.begin - offset; + break; + } + + offset += v->len; + } + + if (idx == fr->datacnt) { + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + continue; + } + + assert(gap.begin == offset + base_offset); + + end_idx = idx; + end_offset = offset; + end_base_offset = 0; + + for (; end_idx < fr->datacnt; ++end_idx) { + v = &fr->data[end_idx]; + if (end_offset + v->len > gap.end) { + end_base_offset = gap.end - end_offset; + break; + } + + end_offset += v->len; + } + + if (fr->offset == offset && base_offset == 0 && fr->datacnt == end_idx) { + *pfrc = frc; + return 0; + } + + if (fr->datacnt == end_idx) { + memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx)); + + assert(fr->data[0].len > base_offset); + + fr->offset = offset + base_offset; + fr->datacnt = end_idx - idx; + fr->data[0].base += base_offset; + fr->data[0].len -= (size_t)base_offset; + + *pfrc = frc; + return 0; + } + + rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new( + &nfrc, fr->datacnt - end_idx, &conn->frc_objalloc, conn->mem); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + nfr = &nfrc->fr.crypto; + nfr->type = NGTCP2_FRAME_CRYPTO; + memcpy(nfr->data, fr->data + end_idx, + sizeof(nfr->data[0]) * (fr->datacnt - end_idx)); + + assert(nfr->data[0].len > end_base_offset); + + nfr->offset = end_offset + end_base_offset; + nfr->datacnt = fr->datacnt - end_idx; + nfr->data[0].base += end_base_offset; + nfr->data[0].len -= (size_t)end_base_offset; + + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + if (end_base_offset) { + ++end_idx; + } + + memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx)); + + assert(fr->data[0].len > base_offset); + + fr->offset = offset + base_offset; + fr->datacnt = end_idx - idx; + if (end_base_offset) { + assert(fr->data[fr->datacnt - 1].len > end_base_offset); + fr->data[fr->datacnt - 1].len = (size_t)end_base_offset; + } + fr->data[0].base += base_offset; + fr->data[0].len -= (size_t)base_offset; + + *pfrc = frc; + return 0; + } + + return 0; +} +static int conn_cryptofrq_pop(ngtcp2_conn *conn, ngtcp2_frame_chain **pfrc, + ngtcp2_pktns *pktns, size_t left) { + ngtcp2_crypto *fr, *nfr; + ngtcp2_frame_chain *frc, *nfrc; + int rv; + size_t nmerged; + uint64_t datalen; + ngtcp2_vec a[NGTCP2_MAX_CRYPTO_DATACNT]; + ngtcp2_vec b[NGTCP2_MAX_CRYPTO_DATACNT]; + size_t acnt, bcnt; + ngtcp2_ksl_it it; + + rv = conn_cryptofrq_unacked_pop(conn, pktns, &frc); + if (rv != 0) { + return rv; + } + if (frc == NULL) { + *pfrc = NULL; + return 0; + } + + fr = &frc->fr.crypto; + datalen = ngtcp2_vec_len(fr->data, fr->datacnt); + + if (datalen > left) { + ngtcp2_vec_copy(a, fr->data, fr->datacnt); + acnt = fr->datacnt; + + bcnt = 0; + ngtcp2_vec_split(a, &acnt, b, &bcnt, left, NGTCP2_MAX_CRYPTO_DATACNT); + + assert(acnt > 0); + assert(bcnt > 0); + + rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new( + &nfrc, bcnt, &conn->frc_objalloc, conn->mem); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + nfr = &nfrc->fr.crypto; + nfr->type = NGTCP2_FRAME_CRYPTO; + nfr->offset = fr->offset + left; + nfr->datacnt = bcnt; + ngtcp2_vec_copy(nfr->data, b, bcnt); + + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new( + &nfrc, acnt, &conn->frc_objalloc, conn->mem); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + nfr = &nfrc->fr.crypto; + *nfr = *fr; + nfr->datacnt = acnt; + ngtcp2_vec_copy(nfr->data, a, acnt); + + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + + *pfrc = nfrc; + + return 0; + } + + left -= (size_t)datalen; + + ngtcp2_vec_copy(a, fr->data, fr->datacnt); + acnt = fr->datacnt; + + for (; left && ngtcp2_ksl_len(&pktns->crypto.tx.frq);) { + it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); + nfrc = ngtcp2_ksl_it_get(&it); + nfr = &nfrc->fr.crypto; + + if (nfr->offset != fr->offset + datalen) { + assert(fr->offset + datalen < nfr->offset); + break; + } + + rv = conn_cryptofrq_unacked_pop(conn, pktns, &nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + if (nfrc == NULL) { + break; + } + + nfr = &nfrc->fr.crypto; + + nmerged = ngtcp2_vec_merge(a, &acnt, nfr->data, &nfr->datacnt, left, + NGTCP2_MAX_CRYPTO_DATACNT); + if (nmerged == 0) { + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + break; + } + + datalen += nmerged; + left -= nmerged; + + if (nfr->datacnt == 0) { + ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem); + continue; + } + + nfr->offset += nmerged; + + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + break; + } + + if (acnt == fr->datacnt) { + assert(acnt > 0); + fr->data[acnt - 1] = a[acnt - 1]; + + *pfrc = frc; + return 0; + } + + assert(acnt > fr->datacnt); + + rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new( + &nfrc, acnt, &conn->frc_objalloc, conn->mem); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + nfr = &nfrc->fr.crypto; + *nfr = *fr; + nfr->datacnt = acnt; + ngtcp2_vec_copy(nfr->data, a, acnt); + + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + + *pfrc = nfrc; + + return 0; +} + +/* + * conn_verify_dcid verifies that destination connection ID in |hd| is + * valid for the connection. If it is successfully verified and the + * remote endpoint uses new DCID in the packet, nonzero value is + * assigned to |*pnew_cid_used| if it is not NULL. Otherwise 0 is + * assigned to it. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_INVALID_ARGUMENT + * |dcid| is not known to the local endpoint. + */ +static int conn_verify_dcid(ngtcp2_conn *conn, int *pnew_cid_used, + const ngtcp2_pkt_hd *hd) { + ngtcp2_ksl_it it; + ngtcp2_scid *scid; + int rv; + + it = ngtcp2_ksl_lower_bound(&conn->scid.set, &hd->dcid); + if (ngtcp2_ksl_it_end(&it)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + scid = ngtcp2_ksl_it_get(&it); + if (!ngtcp2_cid_eq(&scid->cid, &hd->dcid)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (!(scid->flags & NGTCP2_SCID_FLAG_USED)) { + scid->flags |= NGTCP2_SCID_FLAG_USED; + + if (scid->pe.index == NGTCP2_PQ_BAD_INDEX) { + rv = ngtcp2_pq_push(&conn->scid.used, &scid->pe); + if (rv != 0) { + return rv; + } + } + + if (pnew_cid_used) { + *pnew_cid_used = 1; + } + } else if (pnew_cid_used) { + *pnew_cid_used = 0; + } + + return 0; +} + +/* + * conn_should_pad_pkt returns nonzero if the packet should be padded. + * |type| is the type of packet. |left| is the space left in packet + * buffer. |write_datalen| is the number of bytes which will be sent + * in the next, coalesced 0-RTT or 1RTT packet. + */ +static int conn_should_pad_pkt(ngtcp2_conn *conn, uint8_t type, size_t left, + uint64_t write_datalen, int ack_eliciting, + int require_padding) { + uint64_t min_payloadlen; + + if (type == NGTCP2_PKT_INITIAL) { + if (conn->server) { + if (!ack_eliciting) { + return 0; + } + + if (conn->hs_pktns->crypto.tx.ckm && + (conn->hs_pktns->rtb.probe_pkt_left || + ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq) || + !ngtcp2_acktr_empty(&conn->hs_pktns->acktr))) { + /* If we have something to send in Handshake packet, then add + PADDING in Handshake packet. */ + min_payloadlen = NGTCP2_MIN_COALESCED_PAYLOADLEN; + } else { + return 1; + } + } else { + if (conn->hs_pktns->crypto.tx.ckm && + (conn->hs_pktns->rtb.probe_pkt_left || + ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq) || + !ngtcp2_acktr_empty(&conn->hs_pktns->acktr))) { + /* If we have something to send in Handshake packet, then add + PADDING in Handshake packet. */ + min_payloadlen = NGTCP2_MIN_COALESCED_PAYLOADLEN; + } else if ((!conn->early.ckm && !conn->pktns.crypto.tx.ckm) || + write_datalen == 0) { + return 1; + } else { + /* If we have something to send in 0RTT or 1RTT packet, then + add PADDING in that packet. Take maximum in case that + write_datalen includes DATAGRAM which cannot be split. */ + min_payloadlen = + ngtcp2_max(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN); + } + } + } else { + assert(type == NGTCP2_PKT_HANDSHAKE); + + if (!require_padding) { + return 0; + } + + if (!conn->pktns.crypto.tx.ckm || write_datalen == 0) { + return 1; + } + + min_payloadlen = ngtcp2_max(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN); + } + + /* TODO the next packet type should be taken into account */ + return left < + /* TODO Assuming that pkt_num is encoded in 1 byte. */ + NGTCP2_MIN_LONG_HEADERLEN + conn->dcid.current.cid.datalen + + conn->oscid.datalen + NGTCP2_PKT_LENGTHLEN - 1 + min_payloadlen + + NGTCP2_MAX_AEAD_OVERHEAD; +} + +static void conn_restart_timer_on_write(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + conn->idle_ts = ts; + conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE; +} + +static void conn_restart_timer_on_read(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + conn->idle_ts = ts; + conn->flags |= NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE; +} + +/* + * conn_keep_alive_enabled returns nonzero if keep-alive is enabled. + */ +static int conn_keep_alive_enabled(ngtcp2_conn *conn) { + return conn->keep_alive.last_ts != UINT64_MAX && conn->keep_alive.timeout; +} + +/* + * conn_keep_alive_expired returns nonzero if keep-alive timer has + * expired. + */ +static int conn_keep_alive_expired(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + return conn_keep_alive_enabled(conn) && + conn->keep_alive.last_ts + conn->keep_alive.timeout <= ts; +} + +/* + * conn_keep_alive_expiry returns the expiry time of keep-alive timer. + */ +static ngtcp2_tstamp conn_keep_alive_expiry(ngtcp2_conn *conn) { + if ((conn->flags & NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED) || + !conn_keep_alive_enabled(conn)) { + return UINT64_MAX; + } + + return conn->keep_alive.last_ts + conn->keep_alive.timeout; +} + +/* + * conn_cancel_expired_keep_alive_timer cancels the expired keep-alive + * timer. + */ +static void conn_cancel_expired_keep_alive_timer(ngtcp2_conn *conn, + ngtcp2_tstamp ts) { + if (!(conn->flags & NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED) && + conn_keep_alive_expired(conn, ts)) { + conn->flags |= NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED; + } +} + +/* + * conn_update_keep_alive_last_ts updates the base time point of + * keep-alive timer. + */ +static void conn_update_keep_alive_last_ts(ngtcp2_conn *conn, + ngtcp2_tstamp ts) { + conn->keep_alive.last_ts = ts; + conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED; +} + +void ngtcp2_conn_set_keep_alive_timeout(ngtcp2_conn *conn, + ngtcp2_duration timeout) { + conn->keep_alive.timeout = timeout; +} + +/* + * NGTCP2_PKT_PACING_OVERHEAD defines overhead of userspace event + * loop. Packet pacing might require sub milliseconds packet spacing, + * but userspace event loop might not offer such precision. + * Typically, if delay is 0.5 microseconds, the actual delay after + * which we can send packet is well over 1 millisecond when event loop + * is involved (which includes other stuff, like reading packets etc + * in a typical single threaded use case). + */ +#define NGTCP2_PKT_PACING_OVERHEAD NGTCP2_MILLISECONDS + +static void conn_cancel_expired_pkt_tx_timer(ngtcp2_conn *conn, + ngtcp2_tstamp ts) { + if (conn->tx.pacing.next_ts == UINT64_MAX) { + return; + } + + if (conn->tx.pacing.next_ts > ts + NGTCP2_PKT_PACING_OVERHEAD) { + return; + } + + conn->tx.pacing.next_ts = UINT64_MAX; +} + +static int conn_pacing_pkt_tx_allowed(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + return conn->tx.pacing.next_ts == UINT64_MAX || + conn->tx.pacing.next_ts <= ts + NGTCP2_PKT_PACING_OVERHEAD; +} + +static uint8_t conn_pkt_flags(ngtcp2_conn *conn) { + if (conn->remote.transport_params && + conn->remote.transport_params->grease_quic_bit && + (conn->flags & NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT)) { + return NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR; + } + + return NGTCP2_PKT_FLAG_NONE; +} + +static uint8_t conn_pkt_flags_long(ngtcp2_conn *conn) { + return NGTCP2_PKT_FLAG_LONG_FORM | conn_pkt_flags(conn); +} + +static uint8_t conn_pkt_flags_short(ngtcp2_conn *conn) { + return (uint8_t)(conn_pkt_flags(conn) | ((conn->pktns.crypto.tx.ckm->flags & + NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE) + ? NGTCP2_PKT_FLAG_KEY_PHASE + : NGTCP2_PKT_FLAG_NONE)); +} + +/* + * conn_write_handshake_pkt writes handshake packet in the buffer + * pointed by |dest| whose length is |destlen|. |type| specifies long + * packet type. It should be either NGTCP2_PKT_INITIAL or + * NGTCP2_PKT_HANDSHAKE_PKT. + * + * |write_datalen| is the minimum length of application data ready to + * send in subsequent 0RTT or 1RTT packet. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static ngtcp2_ssize +conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint8_t type, uint8_t flags, + uint64_t write_datalen, ngtcp2_tstamp ts) { + int rv; + ngtcp2_ppe ppe; + ngtcp2_pkt_hd hd; + ngtcp2_frame_chain *frq = NULL, **pfrc = &frq; + ngtcp2_frame_chain *nfrc; + ngtcp2_frame *ackfr = NULL, lfr; + ngtcp2_ssize spktlen; + ngtcp2_crypto_cc cc; + ngtcp2_rtb_entry *rtbent; + ngtcp2_pktns *pktns; + size_t left; + uint16_t rtb_entry_flags = NGTCP2_RTB_ENTRY_FLAG_NONE; + int require_padding = (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) != 0; + int pkt_empty = 1; + int padded = 0; + int hd_logged = 0; + uint64_t crypto_offset; + ngtcp2_ssize num_reclaimed; + uint32_t version; + + switch (type) { + case NGTCP2_PKT_INITIAL: + if (!conn->in_pktns) { + return 0; + } + assert(conn->in_pktns->crypto.tx.ckm); + pktns = conn->in_pktns; + version = conn->negotiated_version ? conn->negotiated_version + : conn->client_chosen_version; + if (version == conn->client_chosen_version) { + cc.ckm = pktns->crypto.tx.ckm; + cc.hp_ctx = pktns->crypto.tx.hp_ctx; + } else { + assert(conn->vneg.version == version); + + cc.ckm = conn->vneg.tx.ckm; + cc.hp_ctx = conn->vneg.tx.hp_ctx; + } + break; + case NGTCP2_PKT_HANDSHAKE: + if (!conn->hs_pktns || !conn->hs_pktns->crypto.tx.ckm) { + return 0; + } + pktns = conn->hs_pktns; + version = conn->negotiated_version; + cc.ckm = pktns->crypto.tx.ckm; + cc.hp_ctx = pktns->crypto.tx.hp_ctx; + break; + default: + ngtcp2_unreachable(); + } + + cc.aead = pktns->crypto.ctx.aead; + cc.hp = pktns->crypto.ctx.hp; + cc.encrypt = conn->callbacks.encrypt; + cc.hp_mask = conn->callbacks.hp_mask; + + ngtcp2_pkt_hd_init(&hd, conn_pkt_flags_long(conn), type, + &conn->dcid.current.cid, &conn->oscid, + pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns), + version, 0); + + if (!conn->server && type == NGTCP2_PKT_INITIAL && + conn->local.settings.tokenlen) { + hd.token = conn->local.settings.token; + hd.tokenlen = conn->local.settings.tokenlen; + } + + ngtcp2_ppe_init(&ppe, dest, destlen, &cc); + + rv = ngtcp2_ppe_encode_hd(&ppe, &hd); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + return 0; + } + + if (!ngtcp2_ppe_ensure_hp_sample(&ppe)) { + return 0; + } + + rv = conn_create_ack_frame(conn, &ackfr, pktns, type, ts, + /* ack_delay = */ 0, + NGTCP2_DEFAULT_ACK_DELAY_EXPONENT); + if (rv != 0) { + ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, conn->mem); + return rv; + } + + if (ackfr) { + rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, ackfr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + } else { + ngtcp2_acktr_commit_ack(&pktns->acktr); + ngtcp2_acktr_add_ack(&pktns->acktr, hd.pkt_num, ackfr->ack.largest_ack); + pkt_empty = 0; + } + } + + /* Server requires at least NGTCP2_MAX_UDP_PAYLOAD_SIZE bytes in + order to send ack-eliciting Initial packet. */ + if (!conn->server || type != NGTCP2_PKT_INITIAL || + destlen >= NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + build_pkt: + for (; ngtcp2_ksl_len(&pktns->crypto.tx.frq);) { + left = ngtcp2_ppe_left(&ppe); + + crypto_offset = conn_cryptofrq_unacked_offset(conn, pktns); + if (crypto_offset == (size_t)-1) { + conn_cryptofrq_clear(conn, pktns); + break; + } + + left = ngtcp2_pkt_crypto_max_datalen(crypto_offset, left, left); + if (left == (size_t)-1) { + break; + } + + rv = conn_cryptofrq_pop(conn, &nfrc, pktns, left); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, + conn->mem); + return rv; + } + + if (nfrc == NULL) { + break; + } + + rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, &nfrc->fr); + if (rv != 0) { + ngtcp2_unreachable(); + } + + *pfrc = nfrc; + pfrc = &(*pfrc)->next; + + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + } + + if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) && + pktns->rtb.num_retransmittable && pktns->rtb.probe_pkt_left) { + num_reclaimed = ngtcp2_rtb_reclaim_on_pto(&pktns->rtb, conn, pktns, 1); + if (num_reclaimed < 0) { + ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, + conn->mem); + return rv; + } + if (num_reclaimed) { + goto build_pkt; + } + /* We had pktns->rtb.num_retransmittable > 0 but the contents of + those packets have been acknowledged (i.e., retransmission in + another packet). For server, in this case, we don't have to + send any probe packet. Client needs to send probe packets + until it knows that server has completed address validation or + handshake has been confirmed. */ + if (pktns->rtb.num_pto_eliciting == 0 && + (conn->server || + (conn->flags & (NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED | + NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)))) { + pktns->rtb.probe_pkt_left = 0; + ngtcp2_conn_set_loss_detection_timer(conn, ts); + /* TODO If packet is empty, we should return now if cwnd is + zero. */ + } + } + + if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) && + pktns->rtb.probe_pkt_left) { + lfr.type = NGTCP2_FRAME_PING; + + rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, &lfr); + if (rv != 0) { + assert(rv == NGTCP2_ERR_NOBUF); + } else { + rtb_entry_flags |= + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PROBE; + pkt_empty = 0; + } + } + + if (!pkt_empty) { + if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + /* The intention of smaller limit is get more chance to measure + RTT samples in early phase. */ + if (pktns->tx.num_non_ack_pkt >= 1) { + lfr.type = NGTCP2_FRAME_PING; + + rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, &lfr); + if (rv != 0) { + assert(rv == NGTCP2_ERR_NOBUF); + } else { + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING; + pktns->tx.num_non_ack_pkt = 0; + } + } else { + ++pktns->tx.num_non_ack_pkt; + } + } else { + pktns->tx.num_non_ack_pkt = 0; + } + } + } + + if (pkt_empty) { + return 0; + } + + /* If we cannot write another packet, then we need to add padding to + Initial here. */ + if (conn_should_pad_pkt( + conn, type, ngtcp2_ppe_left(&ppe), write_datalen, + (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) != 0, + require_padding)) { + lfr.type = NGTCP2_FRAME_PADDING; + lfr.padding.len = ngtcp2_ppe_padding(&ppe); + } else { + lfr.type = NGTCP2_FRAME_PADDING; + lfr.padding.len = ngtcp2_ppe_padding_hp_sample(&ppe); + } + + if (lfr.padding.len) { + padded = 1; + ngtcp2_log_tx_fr(&conn->log, &hd, &lfr); + ngtcp2_qlog_write_frame(&conn->qlog, &lfr); + } + + spktlen = ngtcp2_ppe_final(&ppe, NULL); + if (spktlen < 0) { + assert(ngtcp2_err_is_fatal((int)spktlen)); + ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, conn->mem); + return spktlen; + } + + ngtcp2_qlog_pkt_sent_end(&conn->qlog, &hd, (size_t)spktlen); + + if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) || padded) { + if (pi) { + conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, &hd, ts); + } + + rv = ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, frq, ts, (size_t)spktlen, + rtb_entry_flags, + &conn->rtb_entry_objalloc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, conn->mem); + return rv; + } + + rv = conn_on_pkt_sent(conn, &pktns->rtb, rtbent); + if (rv != 0) { + ngtcp2_rtb_entry_objalloc_del(rtbent, &conn->rtb_entry_objalloc, + &conn->frc_objalloc, conn->mem); + return rv; + } + + if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) && + (conn->flags & NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE)) { + conn_restart_timer_on_write(conn, ts); + } + } else if (pi && conn->tx.ecn.state == NGTCP2_ECN_STATE_CAPABLE) { + conn_handle_tx_ecn(conn, pi, NULL, pktns, &hd, ts); + } + + if (pktns->rtb.probe_pkt_left && + (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + --pktns->rtb.probe_pkt_left; + } + + conn_update_keep_alive_last_ts(conn, ts); + + conn->dcid.current.bytes_sent += (uint64_t)spktlen; + + conn->tx.pacing.pktlen += (size_t)spktlen; + + ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat); + + ++pktns->tx.last_pkt_num; + + return spktlen; +} + +/* + * conn_write_ack_pkt writes QUIC packet for type |type| which only + * includes ACK frame in the buffer pointed by |dest| whose length is + * |destlen|. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static ngtcp2_ssize conn_write_ack_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + uint8_t type, ngtcp2_tstamp ts) { + int rv; + ngtcp2_frame *ackfr; + ngtcp2_pktns *pktns; + ngtcp2_duration ack_delay; + uint64_t ack_delay_exponent; + ngtcp2_ssize spktlen; + + assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING)); + + switch (type) { + case NGTCP2_PKT_INITIAL: + assert(conn->server); + pktns = conn->in_pktns; + ack_delay = 0; + ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; + break; + case NGTCP2_PKT_HANDSHAKE: + pktns = conn->hs_pktns; + ack_delay = 0; + ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; + break; + case NGTCP2_PKT_1RTT: + pktns = &conn->pktns; + ack_delay = conn_compute_ack_delay(conn); + ack_delay_exponent = conn->local.transport_params.ack_delay_exponent; + break; + default: + ngtcp2_unreachable(); + } + + if (!pktns->crypto.tx.ckm) { + return 0; + } + + ackfr = NULL; + rv = conn_create_ack_frame(conn, &ackfr, pktns, type, ts, ack_delay, + ack_delay_exponent); + if (rv != 0) { + return rv; + } + + if (!ackfr) { + return 0; + } + + spktlen = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, destlen, type, NGTCP2_WRITE_PKT_FLAG_NONE, + &conn->dcid.current.cid, ackfr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + + if (spktlen <= 0) { + return spktlen; + } + + conn->dcid.current.bytes_sent += (uint64_t)spktlen; + + return spktlen; +} + +static void conn_discard_pktns(ngtcp2_conn *conn, ngtcp2_pktns **ppktns, + ngtcp2_tstamp ts) { + ngtcp2_pktns *pktns = *ppktns; + uint64_t bytes_in_flight; + + bytes_in_flight = pktns->rtb.cc_bytes_in_flight; + + assert(conn->cstat.bytes_in_flight >= bytes_in_flight); + + conn->cstat.bytes_in_flight -= bytes_in_flight; + conn->cstat.pto_count = 0; + conn->cstat.last_tx_pkt_ts[pktns->rtb.pktns_id] = UINT64_MAX; + conn->cstat.loss_time[pktns->rtb.pktns_id] = UINT64_MAX; + + conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.rx.ckm->aead_ctx); + conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.rx.hp_ctx); + conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.tx.ckm->aead_ctx); + conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.tx.hp_ctx); + + pktns_del(pktns, conn->mem); + *ppktns = NULL; + + ngtcp2_conn_set_loss_detection_timer(conn, ts); +} + +/* + * conn_discard_initial_state discards state for Initial packet number + * space. + */ +static void conn_discard_initial_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + if (!conn->in_pktns) { + return; + } + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "discarding Initial packet number space"); + + conn_discard_pktns(conn, &conn->in_pktns, ts); + + conn_vneg_crypto_free(conn); + + memset(&conn->vneg.rx, 0, sizeof(conn->vneg.rx)); + memset(&conn->vneg.tx, 0, sizeof(conn->vneg.tx)); +} + +/* + * conn_discard_handshake_state discards state for Handshake packet + * number space. + */ +static void conn_discard_handshake_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + if (!conn->hs_pktns) { + return; + } + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "discarding Handshake packet number space"); + + conn_discard_pktns(conn, &conn->hs_pktns, ts); +} + +/* + * conn_discard_early_key discards early key. + */ +static void conn_discard_early_key(ngtcp2_conn *conn) { + assert(conn->early.ckm); + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "discarding early key"); + + conn_call_delete_crypto_aead_ctx(conn, &conn->early.ckm->aead_ctx); + conn_call_delete_crypto_cipher_ctx(conn, &conn->early.hp_ctx); + memset(&conn->early.hp_ctx, 0, sizeof(conn->early.hp_ctx)); + + ngtcp2_crypto_km_del(conn->early.ckm, conn->mem); + conn->early.ckm = NULL; +} + +/* + * conn_write_handshake_ack_pkts writes packets which contain ACK + * frame only. This function writes at most 2 packets for each + * Initial and Handshake packet. + */ +static ngtcp2_ssize conn_write_handshake_ack_pkts(ngtcp2_conn *conn, + ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + ngtcp2_tstamp ts) { + ngtcp2_ssize res = 0, nwrite = 0; + + /* In the most cases, client sends ACK in conn_write_handshake_pkt. + This function is only called when it is CWND limited or pacing + limited. It is not required for client to send ACK for server + Initial. This is because once it gets server Initial, it gets + Handshake tx key and discards Initial key. The only good reason + to send ACK is give server RTT measurement early. */ + if (conn->server && conn->in_pktns) { + nwrite = + conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, ts); + if (nwrite < 0) { + assert(nwrite != NGTCP2_ERR_NOBUF); + return nwrite; + } + + res += nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + } + + if (conn->hs_pktns->crypto.tx.ckm) { + nwrite = + conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, ts); + if (nwrite < 0) { + assert(nwrite != NGTCP2_ERR_NOBUF); + return nwrite; + } + + res += nwrite; + + if (!conn->server && nwrite) { + conn_discard_initial_state(conn, ts); + } + } + + return res; +} + +/* + * conn_write_client_initial writes Initial packet in the buffer + * pointed by |dest| whose length is |destlen|. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static ngtcp2_ssize conn_write_client_initial(ngtcp2_conn *conn, + ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + uint64_t early_datalen, + ngtcp2_tstamp ts) { + int rv; + + rv = conn_call_client_initial(conn); + if (rv != 0) { + return rv; + } + + return conn_write_handshake_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, + NGTCP2_WRITE_PKT_FLAG_NONE, early_datalen, + ts); +} + +/* + * dcid_tx_left returns the maximum number of bytes that server is + * allowed to send to an unvalidated path associated to |dcid|. + */ +static uint64_t dcid_tx_left(ngtcp2_dcid *dcid) { + if (dcid->flags & NGTCP2_DCID_FLAG_PATH_VALIDATED) { + return SIZE_MAX; + } + /* From QUIC spec: Prior to validating the client address, servers + MUST NOT send more than three times as many bytes as the number + of bytes they have received. */ + assert(dcid->bytes_recv * 3 >= dcid->bytes_sent); + + return dcid->bytes_recv * 3 - dcid->bytes_sent; +} + +/* + * conn_server_tx_left returns the maximum number of bytes that server + * is allowed to send to an unvalidated path. + */ +static uint64_t conn_server_tx_left(ngtcp2_conn *conn, ngtcp2_dcid *dcid) { + assert(conn->server); + + /* If pv->dcid has the current path, use conn->dcid.current. This + is because conn->dcid.current gets update for bytes_recv and + bytes_sent. */ + if (ngtcp2_path_eq(&dcid->ps.path, &conn->dcid.current.ps.path)) { + return dcid_tx_left(&conn->dcid.current); + } + + return dcid_tx_left(dcid); +} + +/* + * conn_write_handshake_pkts writes Initial and Handshake packets in + * the buffer pointed by |dest| whose length is |destlen|. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static ngtcp2_ssize conn_write_handshake_pkts(ngtcp2_conn *conn, + ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + uint64_t write_datalen, + ngtcp2_tstamp ts) { + ngtcp2_ssize nwrite; + ngtcp2_ssize res = 0; + ngtcp2_rtb_entry *rtbent; + uint8_t wflags = NGTCP2_WRITE_PKT_FLAG_NONE; + ngtcp2_conn_stat *cstat = &conn->cstat; + ngtcp2_ksl_it it; + + /* As a client, we would like to discard Initial packet number space + when sending the first Handshake packet. When sending Handshake + packet, it should be one of 1) sending ACK, 2) sending PTO probe + packet, or 3) sending CRYPTO. If we have pending acknowledgement + for Initial, then do not discard Initial packet number space. + Otherwise, if either 1) or 2) is satisfied, discard Initial + packet number space. When sending Handshake CRYPTO, it indicates + that client has received Handshake CRYPTO from server. Initial + packet number space is discarded because 1) is met. If there is + pending Initial ACK, Initial packet number space is discarded + after writing the first Handshake packet. + */ + if (!conn->server && conn->hs_pktns->crypto.tx.ckm && conn->in_pktns && + !ngtcp2_acktr_require_active_ack(&conn->in_pktns->acktr, + /* max_ack_delay = */ 0, ts) && + (ngtcp2_acktr_require_active_ack(&conn->hs_pktns->acktr, + /* max_ack_delay = */ 0, ts) || + conn->hs_pktns->rtb.probe_pkt_left)) { + /* Discard Initial state here so that Handshake packet is not + padded. */ + conn_discard_initial_state(conn, ts); + } else if (conn->in_pktns) { + nwrite = + conn_write_handshake_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, + NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts); + if (nwrite < 0) { + assert(nwrite != NGTCP2_ERR_NOBUF); + return nwrite; + } + + if (nwrite == 0) { + if (conn->server && (conn->in_pktns->rtb.probe_pkt_left || + ngtcp2_ksl_len(&conn->in_pktns->crypto.tx.frq))) { + if (cstat->loss_detection_timer != UINT64_MAX && + conn_server_tx_left(conn, &conn->dcid.current) < + NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_RCV, + "loss detection timer canceled due to amplification limit"); + cstat->loss_detection_timer = UINT64_MAX; + } + + return 0; + } + } else { + res += nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + + if (destlen) { + /* We might have already added padding to Initial, but in that + case, we should have destlen == 0 and no Handshake packet + will be written. */ + if (conn->server) { + it = ngtcp2_rtb_head(&conn->in_pktns->rtb); + if (!ngtcp2_ksl_it_end(&it)) { + rtbent = ngtcp2_ksl_it_get(&it); + if (rtbent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + } + } + } else { + wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + } + } + } + } + + nwrite = conn_write_handshake_pkt( + conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, wflags, write_datalen, ts); + if (nwrite < 0) { + assert(nwrite != NGTCP2_ERR_NOBUF); + return nwrite; + } + + res += nwrite; + + if (!conn->server && conn->hs_pktns->crypto.tx.ckm && nwrite) { + /* We don't need to send further Initial packet if we have + Handshake key and sent something with it. So discard initial + state here. */ + conn_discard_initial_state(conn, ts); + } + + return res; +} + +/* + * conn_initial_stream_rx_offset returns the initial maximum offset of + * data for a stream denoted by |stream_id|. + */ +static uint64_t conn_initial_stream_rx_offset(ngtcp2_conn *conn, + int64_t stream_id) { + int local_stream = conn_local_stream(conn, stream_id); + + if (bidi_stream(stream_id)) { + if (local_stream) { + return conn->local.transport_params.initial_max_stream_data_bidi_local; + } + return conn->local.transport_params.initial_max_stream_data_bidi_remote; + } + + if (local_stream) { + return 0; + } + return conn->local.transport_params.initial_max_stream_data_uni; +} + +/* + * conn_should_send_max_stream_data returns nonzero if MAX_STREAM_DATA + * frame should be send for |strm|. + */ +static int conn_should_send_max_stream_data(ngtcp2_conn *conn, + ngtcp2_strm *strm) { + uint64_t inc = strm->rx.unsent_max_offset - strm->rx.max_offset; + (void)conn; + + return strm->rx.window < 2 * inc; +} + +/* + * conn_should_send_max_data returns nonzero if MAX_DATA frame should + * be sent. + */ +static int conn_should_send_max_data(ngtcp2_conn *conn) { + uint64_t inc = conn->rx.unsent_max_offset - conn->rx.max_offset; + + return conn->rx.window < 2 * inc; +} + +/* + * conn_required_num_new_connection_id returns the number of + * additional connection ID the local endpoint has to provide to the + * remote endpoint. + */ +static size_t conn_required_num_new_connection_id(ngtcp2_conn *conn) { + uint64_t n; + size_t len = ngtcp2_ksl_len(&conn->scid.set); + + if (len >= NGTCP2_MAX_SCID_POOL_SIZE) { + return 0; + } + + assert(conn->remote.transport_params); + assert(conn->remote.transport_params->active_connection_id_limit); + + /* len includes retired CID. We don't provide extra CID if doing so + exceeds NGTCP2_MAX_SCID_POOL_SIZE. */ + + n = conn->remote.transport_params->active_connection_id_limit + + conn->scid.num_retired; + + return (size_t)ngtcp2_min(NGTCP2_MAX_SCID_POOL_SIZE, n) - len; +} + +/* + * conn_enqueue_new_connection_id generates additional connection IDs + * and prepares to send them to the remote endpoint. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_enqueue_new_connection_id(ngtcp2_conn *conn) { + size_t i, need = conn_required_num_new_connection_id(conn); + size_t cidlen = conn->oscid.datalen; + ngtcp2_cid cid; + uint64_t seq; + int rv; + uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN]; + ngtcp2_frame_chain *nfrc; + ngtcp2_pktns *pktns = &conn->pktns; + ngtcp2_scid *scid; + ngtcp2_ksl_it it; + + for (i = 0; i < need; ++i) { + rv = conn_call_get_new_connection_id(conn, &cid, token, cidlen); + if (rv != 0) { + return rv; + } + + if (cid.datalen != cidlen) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + /* Assert uniqueness */ + it = ngtcp2_ksl_lower_bound(&conn->scid.set, &cid); + if (!ngtcp2_ksl_it_end(&it) && + ngtcp2_cid_eq(ngtcp2_ksl_it_key(&it), &cid)) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + seq = ++conn->scid.last_seq; + + scid = ngtcp2_mem_malloc(conn->mem, sizeof(*scid)); + if (scid == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_scid_init(scid, seq, &cid); + + rv = ngtcp2_ksl_insert(&conn->scid.set, NULL, &scid->cid, scid); + if (rv != 0) { + ngtcp2_mem_free(conn->mem, scid); + return rv; + } + + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + return rv; + } + + nfrc->fr.type = NGTCP2_FRAME_NEW_CONNECTION_ID; + nfrc->fr.new_connection_id.seq = seq; + nfrc->fr.new_connection_id.retire_prior_to = 0; + nfrc->fr.new_connection_id.cid = cid; + memcpy(nfrc->fr.new_connection_id.stateless_reset_token, token, + sizeof(token)); + nfrc->next = pktns->tx.frq; + pktns->tx.frq = nfrc; + } + + return 0; +} + +/* + * conn_remove_retired_connection_id removes the already retired + * connection ID. It waits PTO before actually removing a connection + * ID after it receives RETIRE_CONNECTION_ID from peer to catch + * reordered packets. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_remove_retired_connection_id(ngtcp2_conn *conn, + ngtcp2_duration pto, + ngtcp2_tstamp ts) { + ngtcp2_duration timeout = pto; + ngtcp2_scid *scid; + ngtcp2_dcid *dcid; + int rv; + + for (; !ngtcp2_pq_empty(&conn->scid.used);) { + scid = ngtcp2_struct_of(ngtcp2_pq_top(&conn->scid.used), ngtcp2_scid, pe); + + if (scid->retired_ts == UINT64_MAX || scid->retired_ts + timeout >= ts) { + break; + } + + assert(scid->flags & NGTCP2_SCID_FLAG_RETIRED); + + rv = conn_call_remove_connection_id(conn, &scid->cid); + if (rv != 0) { + return rv; + } + + ngtcp2_ksl_remove(&conn->scid.set, NULL, &scid->cid); + ngtcp2_pq_pop(&conn->scid.used); + ngtcp2_mem_free(conn->mem, scid); + + assert(conn->scid.num_retired); + --conn->scid.num_retired; + } + + for (; ngtcp2_ringbuf_len(&conn->dcid.retired.rb);) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, 0); + if (dcid->retired_ts + timeout >= ts) { + break; + } + + rv = conn_call_deactivate_dcid(conn, dcid); + if (rv != 0) { + return rv; + } + + ngtcp2_ringbuf_pop_front(&conn->dcid.retired.rb); + } + + return 0; +} + +/* + * conn_min_short_pktlen returns the minimum length of Short packet + * this endpoint sends. + */ +static size_t conn_min_short_pktlen(ngtcp2_conn *conn) { + return conn->dcid.current.cid.datalen + NGTCP2_MIN_PKT_EXPANDLEN; +} + +/* + * conn_handle_unconfirmed_key_update_from_remote deals with key + * update which has not been confirmed yet and initiated by the remote + * endpoint. + * + * If key update was initiated by the remote endpoint, acknowledging a + * packet encrypted with the new key completes key update procedure. + */ +static void conn_handle_unconfirmed_key_update_from_remote(ngtcp2_conn *conn, + int64_t largest_ack, + ngtcp2_tstamp ts) { + if (!(conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) || + (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR) || + largest_ack < conn->pktns.crypto.rx.ckm->pkt_num) { + return; + } + + conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED; + conn->crypto.key_update.confirmed_ts = ts; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CRY, "key update confirmed"); +} + +/* + * conn_write_pkt writes a protected packet in the buffer pointed by + * |dest| whose length if |destlen|. |type| specifies the type of + * packet. It can be NGTCP2_PKT_1RTT or NGTCP2_PKT_0RTT. + * + * This function can send new stream data. In order to send stream + * data, specify the underlying stream and parameters to + * |vmsg|->stream. If |vmsg|->stream.fin is set to nonzero, it + * signals that the given data is the final portion of the stream. + * |vmsg|->stream.data vector of length |vmsg|->stream.datacnt + * specifies stream data to send. The number of bytes sent to the + * stream is assigned to *|vmsg|->stream.pdatalen. If 0 length STREAM + * data is sent, 0 is assigned to it. The caller should initialize + * *|vmsg|->stream.pdatalen to -1. + * + * If |require_padding| is nonzero, padding bytes are added to occupy + * the remaining packet payload. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_STREAM_DATA_BLOCKED + * Stream data could not be written because of flow control. + */ +static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + ngtcp2_vmsg *vmsg, uint8_t type, + uint8_t flags, ngtcp2_tstamp ts) { + int rv = 0; + ngtcp2_crypto_cc *cc = &conn->pkt.cc; + ngtcp2_ppe *ppe = &conn->pkt.ppe; + ngtcp2_pkt_hd *hd = &conn->pkt.hd; + ngtcp2_frame *ackfr = NULL, lfr; + ngtcp2_ssize nwrite; + ngtcp2_frame_chain **pfrc, *nfrc, *frc; + ngtcp2_rtb_entry *ent; + ngtcp2_strm *strm; + int pkt_empty = 1; + uint64_t ndatalen = 0; + int send_stream = 0; + int stream_blocked = 0; + int send_datagram = 0; + ngtcp2_pktns *pktns = &conn->pktns; + size_t left; + uint64_t datalen = 0; + ngtcp2_vec data[NGTCP2_MAX_STREAM_DATACNT]; + size_t datacnt; + uint16_t rtb_entry_flags = NGTCP2_RTB_ENTRY_FLAG_NONE; + int hd_logged = 0; + ngtcp2_path_challenge_entry *pcent; + uint8_t hd_flags = NGTCP2_PKT_FLAG_NONE; + int require_padding = (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) != 0; + int write_more = (flags & NGTCP2_WRITE_PKT_FLAG_MORE) != 0; + int ppe_pending = (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) != 0; + size_t min_pktlen = conn_min_short_pktlen(conn); + int padded = 0; + ngtcp2_cc_pkt cc_pkt; + uint64_t crypto_offset; + uint64_t stream_offset; + ngtcp2_ssize num_reclaimed; + int fin; + uint64_t target_max_data; + ngtcp2_conn_stat *cstat = &conn->cstat; + uint64_t delta; + const ngtcp2_cid *scid = NULL; + int keep_alive_expired = 0; + uint32_t version = 0; + + /* Return 0 if destlen is less than minimum packet length which can + trigger Stateless Reset */ + if (destlen < min_pktlen) { + return 0; + } + + if (vmsg) { + switch (vmsg->type) { + case NGTCP2_VMSG_TYPE_STREAM: + datalen = ngtcp2_vec_len(vmsg->stream.data, vmsg->stream.datacnt); + ndatalen = conn_enforce_flow_control(conn, vmsg->stream.strm, datalen); + /* 0 length STREAM frame is allowed */ + if (ndatalen || datalen == 0) { + send_stream = 1; + } else { + stream_blocked = 1; + } + break; + case NGTCP2_VMSG_TYPE_DATAGRAM: + datalen = ngtcp2_vec_len(vmsg->datagram.data, vmsg->datagram.datacnt); + send_datagram = 1; + break; + default: + break; + } + } + + if (!ppe_pending) { + switch (type) { + case NGTCP2_PKT_1RTT: + hd_flags = conn_pkt_flags_short(conn); + scid = NULL; + cc->aead = pktns->crypto.ctx.aead; + cc->hp = pktns->crypto.ctx.hp; + cc->ckm = pktns->crypto.tx.ckm; + cc->hp_ctx = pktns->crypto.tx.hp_ctx; + + assert(conn->negotiated_version); + + version = conn->negotiated_version; + + /* transport parameter is only valid after handshake completion + which means we don't know how many connection ID that remote + peer can accept before handshake completion. */ + if (conn->oscid.datalen && conn_is_handshake_completed(conn)) { + rv = conn_enqueue_new_connection_id(conn); + if (rv != 0) { + return rv; + } + } + + break; + case NGTCP2_PKT_0RTT: + assert(!conn->server); + if (!conn->early.ckm) { + return 0; + } + hd_flags = conn_pkt_flags_long(conn); + scid = &conn->oscid; + cc->aead = conn->early.ctx.aead; + cc->hp = conn->early.ctx.hp; + cc->ckm = conn->early.ckm; + cc->hp_ctx = conn->early.hp_ctx; + version = conn->client_chosen_version; + break; + default: + /* Unreachable */ + ngtcp2_unreachable(); + } + + cc->encrypt = conn->callbacks.encrypt; + cc->hp_mask = conn->callbacks.hp_mask; + + if (conn_should_send_max_data(conn)) { + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + return rv; + } + + if (conn->local.settings.max_window && + conn->tx.last_max_data_ts != UINT64_MAX && + ts - conn->tx.last_max_data_ts < + NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt && + conn->local.settings.max_window > conn->rx.window) { + target_max_data = NGTCP2_FLOW_WINDOW_SCALING_FACTOR * conn->rx.window; + if (target_max_data > conn->local.settings.max_window) { + target_max_data = conn->local.settings.max_window; + } + + delta = target_max_data - conn->rx.window; + if (conn->rx.unsent_max_offset + delta > NGTCP2_MAX_VARINT) { + delta = NGTCP2_MAX_VARINT - conn->rx.unsent_max_offset; + } + + conn->rx.window = target_max_data; + } else { + delta = 0; + } + + conn->tx.last_max_data_ts = ts; + + nfrc->fr.type = NGTCP2_FRAME_MAX_DATA; + nfrc->fr.max_data.max_data = conn->rx.unsent_max_offset + delta; + nfrc->next = pktns->tx.frq; + pktns->tx.frq = nfrc; + + conn->rx.max_offset = conn->rx.unsent_max_offset = + nfrc->fr.max_data.max_data; + } + + ngtcp2_pkt_hd_init(hd, hd_flags, type, &conn->dcid.current.cid, scid, + pktns->tx.last_pkt_num + 1, + pktns_select_pkt_numlen(pktns), version, 0); + + ngtcp2_ppe_init(ppe, dest, destlen, cc); + + rv = ngtcp2_ppe_encode_hd(ppe, hd); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + return 0; + } + + if (!ngtcp2_ppe_ensure_hp_sample(ppe)) { + return 0; + } + + if (ngtcp2_ringbuf_len(&conn->rx.path_challenge.rb)) { + pcent = ngtcp2_ringbuf_get(&conn->rx.path_challenge.rb, 0); + + /* PATH_RESPONSE is bound to the path that the corresponding + PATH_CHALLENGE is received. */ + if (ngtcp2_path_eq(&conn->dcid.current.ps.path, &pcent->ps.path)) { + lfr.type = NGTCP2_FRAME_PATH_RESPONSE; + memcpy(lfr.path_response.data, pcent->data, + sizeof(lfr.path_response.data)); + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &lfr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + } else { + ngtcp2_ringbuf_pop_front(&conn->rx.path_challenge.rb); + + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING; + require_padding = + !conn->server || destlen >= NGTCP2_MAX_UDP_PAYLOAD_SIZE; + /* We don't retransmit PATH_RESPONSE. */ + } + } + } + + rv = conn_create_ack_frame(conn, &ackfr, pktns, type, ts, + conn_compute_ack_delay(conn), + conn->local.transport_params.ack_delay_exponent); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + if (ackfr) { + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, ackfr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + } else { + ngtcp2_acktr_commit_ack(&pktns->acktr); + ngtcp2_acktr_add_ack(&pktns->acktr, hd->pkt_num, + ackfr->ack.largest_ack); + if (type == NGTCP2_PKT_1RTT) { + conn_handle_unconfirmed_key_update_from_remote( + conn, ackfr->ack.largest_ack, ts); + } + pkt_empty = 0; + } + } + + build_pkt: + for (pfrc = &pktns->tx.frq; *pfrc;) { + if ((*pfrc)->binder && + ((*pfrc)->binder->flags & NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK)) { + frc = *pfrc; + *pfrc = (*pfrc)->next; + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + continue; + } + + switch ((*pfrc)->fr.type) { + case NGTCP2_FRAME_STOP_SENDING: + strm = + ngtcp2_conn_find_stream(conn, (*pfrc)->fr.stop_sending.stream_id); + if (strm == NULL || (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD)) { + frc = *pfrc; + *pfrc = (*pfrc)->next; + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + continue; + } + + if (!(strm->flags & NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED)) { + strm->flags |= NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED; + + rv = conn_call_stream_stop_sending( + conn, (*pfrc)->fr.stop_sending.stream_id, + (*pfrc)->fr.stop_sending.app_error_code, strm->stream_user_data); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + } + + break; + case NGTCP2_FRAME_STREAM: + ngtcp2_unreachable(); + case NGTCP2_FRAME_MAX_STREAMS_BIDI: + if ((*pfrc)->fr.max_streams.max_streams < + conn->remote.bidi.max_streams) { + frc = *pfrc; + *pfrc = (*pfrc)->next; + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + continue; + } + break; + case NGTCP2_FRAME_MAX_STREAMS_UNI: + if ((*pfrc)->fr.max_streams.max_streams < + conn->remote.uni.max_streams) { + frc = *pfrc; + *pfrc = (*pfrc)->next; + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + continue; + } + break; + case NGTCP2_FRAME_MAX_STREAM_DATA: + strm = ngtcp2_conn_find_stream(conn, + (*pfrc)->fr.max_stream_data.stream_id); + if (strm == NULL || (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) || + (*pfrc)->fr.max_stream_data.max_stream_data < strm->rx.max_offset) { + frc = *pfrc; + *pfrc = (*pfrc)->next; + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + continue; + } + break; + case NGTCP2_FRAME_MAX_DATA: + if ((*pfrc)->fr.max_data.max_data < conn->rx.max_offset) { + frc = *pfrc; + *pfrc = (*pfrc)->next; + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + continue; + } + break; + case NGTCP2_FRAME_CRYPTO: + ngtcp2_unreachable(); + } + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &(*pfrc)->fr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + break; + } + + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + pfrc = &(*pfrc)->next; + } + + if (rv != NGTCP2_ERR_NOBUF) { + for (; ngtcp2_ksl_len(&pktns->crypto.tx.frq);) { + left = ngtcp2_ppe_left(ppe); + + crypto_offset = conn_cryptofrq_unacked_offset(conn, pktns); + if (crypto_offset == (size_t)-1) { + conn_cryptofrq_clear(conn, pktns); + break; + } + + left = ngtcp2_pkt_crypto_max_datalen(crypto_offset, left, left); + + if (left == (size_t)-1) { + break; + } + + rv = conn_cryptofrq_pop(conn, &nfrc, pktns, left); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + if (nfrc == NULL) { + break; + } + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + if (rv != 0) { + ngtcp2_unreachable(); + } + + *pfrc = nfrc; + pfrc = &(*pfrc)->next; + + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + } + } + + /* Write MAX_STREAM_ID after RESET_STREAM so that we can extend stream + ID space in one packet. */ + if (rv != NGTCP2_ERR_NOBUF && *pfrc == NULL && + conn->remote.bidi.unsent_max_streams > conn->remote.bidi.max_streams) { + rv = conn_call_extend_max_remote_streams_bidi( + conn, conn->remote.bidi.unsent_max_streams); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + nfrc->fr.type = NGTCP2_FRAME_MAX_STREAMS_BIDI; + nfrc->fr.max_streams.max_streams = conn->remote.bidi.unsent_max_streams; + *pfrc = nfrc; + + conn->remote.bidi.max_streams = conn->remote.bidi.unsent_max_streams; + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &(*pfrc)->fr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + } else { + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + pfrc = &(*pfrc)->next; + } + } + + if (rv != NGTCP2_ERR_NOBUF && *pfrc == NULL) { + if (conn->remote.uni.unsent_max_streams > conn->remote.uni.max_streams) { + rv = conn_call_extend_max_remote_streams_uni( + conn, conn->remote.uni.unsent_max_streams); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + nfrc->fr.type = NGTCP2_FRAME_MAX_STREAMS_UNI; + nfrc->fr.max_streams.max_streams = conn->remote.uni.unsent_max_streams; + *pfrc = nfrc; + + conn->remote.uni.max_streams = conn->remote.uni.unsent_max_streams; + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, + &(*pfrc)->fr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + } else { + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + pfrc = &(*pfrc)->next; + } + } + } + + if (rv != NGTCP2_ERR_NOBUF) { + for (; !ngtcp2_pq_empty(&conn->tx.strmq);) { + strm = ngtcp2_conn_tx_strmq_top(conn); + + if (!(strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) && + conn_should_send_max_stream_data(conn, strm)) { + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + if (conn->local.settings.max_stream_window && + strm->tx.last_max_stream_data_ts != UINT64_MAX && + ts - strm->tx.last_max_stream_data_ts < + NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt && + conn->local.settings.max_stream_window > strm->rx.window) { + target_max_data = + NGTCP2_FLOW_WINDOW_SCALING_FACTOR * strm->rx.window; + if (target_max_data > conn->local.settings.max_stream_window) { + target_max_data = conn->local.settings.max_stream_window; + } + + delta = target_max_data - strm->rx.window; + if (strm->rx.unsent_max_offset + delta > NGTCP2_MAX_VARINT) { + delta = NGTCP2_MAX_VARINT - strm->rx.unsent_max_offset; + } + + strm->rx.window = target_max_data; + } else { + delta = 0; + } + + strm->tx.last_max_stream_data_ts = ts; + + nfrc->fr.type = NGTCP2_FRAME_MAX_STREAM_DATA; + nfrc->fr.max_stream_data.stream_id = strm->stream_id; + nfrc->fr.max_stream_data.max_stream_data = + strm->rx.unsent_max_offset + delta; + ngtcp2_list_insert(nfrc, pfrc); + + rv = + conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + break; + } + + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + pfrc = &(*pfrc)->next; + strm->rx.max_offset = strm->rx.unsent_max_offset = + nfrc->fr.max_stream_data.max_stream_data; + } + + if (ngtcp2_strm_streamfrq_empty(strm)) { + ngtcp2_conn_tx_strmq_pop(conn); + continue; + } + + stream_offset = ngtcp2_strm_streamfrq_unacked_offset(strm); + if (stream_offset == (uint64_t)-1) { + ngtcp2_strm_streamfrq_clear(strm); + ngtcp2_conn_tx_strmq_pop(conn); + assert(conn->tx.strmq_nretrans); + --conn->tx.strmq_nretrans; + continue; + } + + left = ngtcp2_ppe_left(ppe); + + left = ngtcp2_pkt_stream_max_datalen(strm->stream_id, stream_offset, + left, left); + + if (left == (size_t)-1) { + break; + } + + rv = ngtcp2_strm_streamfrq_pop(strm, &nfrc, left); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + if (nfrc == NULL) { + /* TODO Why? */ + break; + } + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + if (rv != 0) { + ngtcp2_unreachable(); + } + + *pfrc = nfrc; + pfrc = &(*pfrc)->next; + + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + + if (ngtcp2_strm_streamfrq_empty(strm)) { + ngtcp2_conn_tx_strmq_pop(conn); + assert(conn->tx.strmq_nretrans); + --conn->tx.strmq_nretrans; + continue; + } + + ngtcp2_conn_tx_strmq_pop(conn); + ++strm->cycle; + rv = ngtcp2_conn_tx_strmq_push(conn, strm); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + } + } + + if (rv != NGTCP2_ERR_NOBUF && !send_stream && !send_datagram && + !(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) && + pktns->rtb.num_retransmittable && pktns->tx.frq == NULL && + pktns->rtb.probe_pkt_left) { + num_reclaimed = ngtcp2_rtb_reclaim_on_pto(&pktns->rtb, conn, pktns, 1); + if (num_reclaimed < 0) { + return rv; + } + if (num_reclaimed) { + goto build_pkt; + } + + /* We had pktns->rtb.num_retransmittable > 0 but we were unable + to reclaim any frame. In this case, we do not have to send + any probe packet. */ + if (pktns->rtb.num_pto_eliciting == 0) { + pktns->rtb.probe_pkt_left = 0; + ngtcp2_conn_set_loss_detection_timer(conn, ts); + + if (pkt_empty && conn_cwnd_is_zero(conn) && !require_padding) { + return 0; + } + } + } + } else { + pfrc = conn->pkt.pfrc; + rtb_entry_flags |= conn->pkt.rtb_entry_flags; + pkt_empty = conn->pkt.pkt_empty; + hd_logged = conn->pkt.hd_logged; + } + + left = ngtcp2_ppe_left(ppe); + + if (rv != NGTCP2_ERR_NOBUF && send_stream && *pfrc == NULL && + (ndatalen = ngtcp2_pkt_stream_max_datalen( + vmsg->stream.strm->stream_id, vmsg->stream.strm->tx.offset, ndatalen, + left)) != (size_t)-1 && + (ndatalen || datalen == 0)) { + datacnt = ngtcp2_vec_copy_at_most(data, NGTCP2_MAX_STREAM_DATACNT, + vmsg->stream.data, vmsg->stream.datacnt, + (size_t)ndatalen); + ndatalen = ngtcp2_vec_len(data, datacnt); + + assert((datacnt == 0 && datalen == 0) || (datacnt && datalen)); + + rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( + &nfrc, datacnt, &conn->frc_objalloc, conn->mem); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + nfrc->fr.stream.type = NGTCP2_FRAME_STREAM; + nfrc->fr.stream.flags = 0; + nfrc->fr.stream.stream_id = vmsg->stream.strm->stream_id; + nfrc->fr.stream.offset = vmsg->stream.strm->tx.offset; + nfrc->fr.stream.datacnt = datacnt; + ngtcp2_vec_copy(nfrc->fr.stream.data, data, datacnt); + + fin = (vmsg->stream.flags & NGTCP2_WRITE_STREAM_FLAG_FIN) && + ndatalen == datalen; + nfrc->fr.stream.fin = (uint8_t)fin; + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + if (rv != 0) { + ngtcp2_unreachable(); + } + + *pfrc = nfrc; + pfrc = &(*pfrc)->next; + + pkt_empty = 0; + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE; + + vmsg->stream.strm->tx.offset += ndatalen; + conn->tx.offset += ndatalen; + + if (fin) { + ngtcp2_strm_shutdown(vmsg->stream.strm, NGTCP2_STRM_FLAG_SHUT_WR); + } + + if (vmsg->stream.pdatalen) { + *vmsg->stream.pdatalen = (ngtcp2_ssize)ndatalen; + } + } else { + send_stream = 0; + } + + if (rv != NGTCP2_ERR_NOBUF && send_datagram && + left >= ngtcp2_pkt_datagram_framelen((size_t)datalen)) { + if (conn->callbacks.ack_datagram || conn->callbacks.lost_datagram) { + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + nfrc->fr.datagram.type = NGTCP2_FRAME_DATAGRAM_LEN; + nfrc->fr.datagram.dgram_id = vmsg->datagram.dgram_id; + nfrc->fr.datagram.datacnt = vmsg->datagram.datacnt; + nfrc->fr.datagram.data = (ngtcp2_vec *)vmsg->datagram.data; + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr); + assert(rv == 0); + + /* Because DATAGRAM will not be retransmitted, we do not use + data anymore. Just nullify it. The only reason to keep + track a frame is keep dgram_id to pass it to + ngtcp2_ack_datagram or ngtcp2_lost_datagram callbacks. */ + nfrc->fr.datagram.datacnt = 0; + nfrc->fr.datagram.data = NULL; + + *pfrc = nfrc; + pfrc = &(*pfrc)->next; + } else { + lfr.datagram.type = NGTCP2_FRAME_DATAGRAM_LEN; + lfr.datagram.datacnt = vmsg->datagram.datacnt; + lfr.datagram.data = (ngtcp2_vec *)vmsg->datagram.data; + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &lfr); + assert(rv == 0); + } + + pkt_empty = 0; + rtb_entry_flags |= + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_DATAGRAM; + + if (vmsg->datagram.paccepted) { + *vmsg->datagram.paccepted = 1; + } + } else { + send_datagram = 0; + } + + if (pkt_empty) { + assert(rv == 0 || NGTCP2_ERR_NOBUF == rv); + if (rv == 0 && stream_blocked && ngtcp2_conn_get_max_data_left(conn)) { + return NGTCP2_ERR_STREAM_DATA_BLOCKED; + } + + keep_alive_expired = conn_keep_alive_expired(conn, ts); + + if (conn->pktns.rtb.probe_pkt_left == 0 && !keep_alive_expired && + !require_padding) { + return 0; + } + } else if (write_more) { + conn->pkt.pfrc = pfrc; + conn->pkt.pkt_empty = pkt_empty; + conn->pkt.rtb_entry_flags = rtb_entry_flags; + conn->pkt.hd_logged = hd_logged; + conn->flags |= NGTCP2_CONN_FLAG_PPE_PENDING; + + assert(vmsg); + + switch (vmsg->type) { + case NGTCP2_VMSG_TYPE_STREAM: + if (send_stream) { + if (ngtcp2_ppe_left(ppe)) { + return NGTCP2_ERR_WRITE_MORE; + } + break; + } + + if (ngtcp2_conn_get_max_data_left(conn) && stream_blocked) { + return NGTCP2_ERR_STREAM_DATA_BLOCKED; + } + break; + case NGTCP2_VMSG_TYPE_DATAGRAM: + if (send_datagram && ngtcp2_ppe_left(ppe)) { + return NGTCP2_ERR_WRITE_MORE; + } + /* If DATAGRAM cannot be written due to insufficient space, + continue to create a packet with the hope that application + calls ngtcp2_conn_writev_datagram again. */ + break; + default: + ngtcp2_unreachable(); + } + } + + if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + if (pktns->tx.num_non_ack_pkt >= NGTCP2_MAX_NON_ACK_TX_PKT || + keep_alive_expired || conn->pktns.rtb.probe_pkt_left) { + lfr.type = NGTCP2_FRAME_PING; + + rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &lfr); + if (rv != 0) { + assert(rv == NGTCP2_ERR_NOBUF); + /* TODO If buffer is too small, PING cannot be written if + packet is still empty. */ + } else { + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING; + if (conn->pktns.rtb.probe_pkt_left) { + rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_PROBE; + } + pktns->tx.num_non_ack_pkt = 0; + } + } else { + ++pktns->tx.num_non_ack_pkt; + } + } else { + pktns->tx.num_non_ack_pkt = 0; + } + + /* TODO Push STREAM frame back to ngtcp2_strm if there is an error + before ngtcp2_rtb_entry is safely created and added. */ + if (require_padding || + /* Making full sized packet will help GSO a bit */ + ngtcp2_ppe_left(ppe) < 10) { + lfr.padding.len = ngtcp2_ppe_padding(ppe); + } else { + lfr.padding.len = ngtcp2_ppe_padding_size(ppe, min_pktlen); + } + + if (lfr.padding.len) { + lfr.type = NGTCP2_FRAME_PADDING; + padded = 1; + ngtcp2_log_tx_fr(&conn->log, hd, &lfr); + ngtcp2_qlog_write_frame(&conn->qlog, &lfr); + } + + nwrite = ngtcp2_ppe_final(ppe, NULL); + if (nwrite < 0) { + assert(ngtcp2_err_is_fatal((int)nwrite)); + return nwrite; + } + + ++cc->ckm->use_count; + + ngtcp2_qlog_pkt_sent_end(&conn->qlog, hd, (size_t)nwrite); + + /* TODO ack-eliciting vs needs-tracking */ + /* probe packet needs tracking but it does not need ACK, could be lost. */ + if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) || padded) { + if (pi) { + conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, hd, ts); + } + + rv = ngtcp2_rtb_entry_objalloc_new(&ent, hd, NULL, ts, (size_t)nwrite, + rtb_entry_flags, + &conn->rtb_entry_objalloc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal((int)nwrite)); + return rv; + } + + if (*pfrc != pktns->tx.frq) { + ent->frc = pktns->tx.frq; + pktns->tx.frq = *pfrc; + *pfrc = NULL; + } + + if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) && + pktns->rtb.num_ack_eliciting == 0 && conn->cc.event) { + conn->cc.event(&conn->cc, &conn->cstat, NGTCP2_CC_EVENT_TYPE_TX_START, + ts); + } + + rv = conn_on_pkt_sent(conn, &pktns->rtb, ent); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_rtb_entry_objalloc_del(ent, &conn->rtb_entry_objalloc, + &conn->frc_objalloc, conn->mem); + return rv; + } + + if (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + if (conn->cc.on_pkt_sent) { + conn->cc.on_pkt_sent( + &conn->cc, &conn->cstat, + ngtcp2_cc_pkt_init(&cc_pkt, hd->pkt_num, (size_t)nwrite, + NGTCP2_PKTNS_ID_APPLICATION, ts, ent->rst.lost, + ent->rst.tx_in_flight, ent->rst.is_app_limited)); + } + + if (conn->flags & NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE) { + conn_restart_timer_on_write(conn, ts); + } + } + } else if (pi && conn->tx.ecn.state == NGTCP2_ECN_STATE_CAPABLE) { + conn_handle_tx_ecn(conn, pi, NULL, pktns, hd, ts); + } + + conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_PPE_PENDING; + + if (pktns->rtb.probe_pkt_left && + (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + --pktns->rtb.probe_pkt_left; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "probe pkt size=%td", + nwrite); + } + + conn_update_keep_alive_last_ts(conn, ts); + + conn->dcid.current.bytes_sent += (uint64_t)nwrite; + + conn->tx.pacing.pktlen += (size_t)nwrite; + + ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat); + + ++pktns->tx.last_pkt_num; + + return nwrite; +} + +ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( + ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, + uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr, + uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts) { + int rv; + ngtcp2_ppe ppe; + ngtcp2_pkt_hd hd; + ngtcp2_frame lfr; + ngtcp2_ssize nwrite; + ngtcp2_crypto_cc cc; + ngtcp2_pktns *pktns; + uint8_t hd_flags; + ngtcp2_rtb_entry *rtbent; + int padded = 0; + const ngtcp2_cid *scid; + uint32_t version; + + switch (type) { + case NGTCP2_PKT_INITIAL: + pktns = conn->in_pktns; + hd_flags = conn_pkt_flags_long(conn); + scid = &conn->oscid; + version = conn->negotiated_version ? conn->negotiated_version + : conn->client_chosen_version; + if (version == conn->client_chosen_version) { + cc.ckm = pktns->crypto.tx.ckm; + cc.hp_ctx = pktns->crypto.tx.hp_ctx; + } else { + assert(version == conn->vneg.version); + + cc.ckm = conn->vneg.tx.ckm; + cc.hp_ctx = conn->vneg.tx.hp_ctx; + } + break; + case NGTCP2_PKT_HANDSHAKE: + pktns = conn->hs_pktns; + hd_flags = conn_pkt_flags_long(conn); + scid = &conn->oscid; + version = conn->negotiated_version; + cc.ckm = pktns->crypto.tx.ckm; + cc.hp_ctx = pktns->crypto.tx.hp_ctx; + break; + case NGTCP2_PKT_1RTT: + pktns = &conn->pktns; + hd_flags = conn_pkt_flags_short(conn); + scid = NULL; + version = conn->negotiated_version; + cc.ckm = pktns->crypto.tx.ckm; + cc.hp_ctx = pktns->crypto.tx.hp_ctx; + break; + default: + /* We don't support 0-RTT packet in this function. */ + ngtcp2_unreachable(); + } + + cc.aead = pktns->crypto.ctx.aead; + cc.hp = pktns->crypto.ctx.hp; + cc.encrypt = conn->callbacks.encrypt; + cc.hp_mask = conn->callbacks.hp_mask; + + ngtcp2_pkt_hd_init(&hd, hd_flags, type, dcid, scid, + pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns), + version, 0); + + ngtcp2_ppe_init(&ppe, dest, destlen, &cc); + + rv = ngtcp2_ppe_encode_hd(&ppe, &hd); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + return 0; + } + + if (!ngtcp2_ppe_ensure_hp_sample(&ppe)) { + return 0; + } + + ngtcp2_log_tx_pkt_hd(&conn->log, &hd); + ngtcp2_qlog_pkt_sent_start(&conn->qlog); + + rv = conn_ppe_write_frame(conn, &ppe, &hd, fr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + return 0; + } + + lfr.type = NGTCP2_FRAME_PADDING; + if (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) { + lfr.padding.len = ngtcp2_ppe_padding(&ppe); + } else { + switch (fr->type) { + case NGTCP2_FRAME_PATH_CHALLENGE: + case NGTCP2_FRAME_PATH_RESPONSE: + if (!conn->server || destlen >= NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + lfr.padding.len = ngtcp2_ppe_padding(&ppe); + } else { + lfr.padding.len = 0; + } + break; + default: + if (type == NGTCP2_PKT_1RTT) { + lfr.padding.len = + ngtcp2_ppe_padding_size(&ppe, conn_min_short_pktlen(conn)); + } else { + lfr.padding.len = ngtcp2_ppe_padding_hp_sample(&ppe); + } + } + } + if (lfr.padding.len) { + padded = 1; + ngtcp2_log_tx_fr(&conn->log, &hd, &lfr); + ngtcp2_qlog_write_frame(&conn->qlog, &lfr); + } + + nwrite = ngtcp2_ppe_final(&ppe, NULL); + if (nwrite < 0) { + return nwrite; + } + + if (type == NGTCP2_PKT_1RTT) { + ++cc.ckm->use_count; + } + + ngtcp2_qlog_pkt_sent_end(&conn->qlog, &hd, (size_t)nwrite); + + /* Do this when we are sure that there is no error. */ + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + ngtcp2_acktr_commit_ack(&pktns->acktr); + ngtcp2_acktr_add_ack(&pktns->acktr, hd.pkt_num, fr->ack.largest_ack); + if (type == NGTCP2_PKT_1RTT) { + conn_handle_unconfirmed_key_update_from_remote(conn, fr->ack.largest_ack, + ts); + } + break; + } + + if (((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) || padded) && + (!path || ngtcp2_path_eq(&conn->dcid.current.ps.path, path))) { + if (pi && !(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE)) { + conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, &hd, ts); + } + + rv = ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, NULL, ts, (size_t)nwrite, + rtb_entry_flags, + &conn->rtb_entry_objalloc); + if (rv != 0) { + return rv; + } + + rv = conn_on_pkt_sent(conn, &pktns->rtb, rtbent); + if (rv != 0) { + ngtcp2_rtb_entry_objalloc_del(rtbent, &conn->rtb_entry_objalloc, + &conn->frc_objalloc, conn->mem); + return rv; + } + + if (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + if (conn->flags & NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE) { + conn_restart_timer_on_write(conn, ts); + } + + if (pktns->rtb.probe_pkt_left && path && + ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) { + --pktns->rtb.probe_pkt_left; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "probe pkt size=%td", + nwrite); + } + } + } else if (pi && !(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) && + conn->tx.ecn.state == NGTCP2_ECN_STATE_CAPABLE) { + conn_handle_tx_ecn(conn, pi, NULL, pktns, &hd, ts); + } + + if (path && ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) { + conn_update_keep_alive_last_ts(conn, ts); + } + + if (!padded) { + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + break; + default: + conn->tx.pacing.pktlen += (size_t)nwrite; + } + } else { + conn->tx.pacing.pktlen += (size_t)nwrite; + } + + ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat); + + ++pktns->tx.last_pkt_num; + + return nwrite; +} + +/* + * conn_process_early_rtb makes any pending 0RTT packet 1RTT packet. + */ +static void conn_process_early_rtb(ngtcp2_conn *conn) { + ngtcp2_rtb_entry *ent; + ngtcp2_rtb *rtb = &conn->pktns.rtb; + ngtcp2_ksl_it it; + + for (it = ngtcp2_rtb_head(rtb); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + ent = ngtcp2_ksl_it_get(&it); + + if ((ent->hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) == 0 || + ent->hd.type != NGTCP2_PKT_0RTT) { + continue; + } + + /* 0-RTT packet is retransmitted as a 1RTT packet. */ + ent->hd.flags &= (uint8_t)~NGTCP2_PKT_FLAG_LONG_FORM; + ent->hd.type = NGTCP2_PKT_1RTT; + } +} + +/* + * conn_handshake_remnants_left returns nonzero if there may be + * handshake packets the local endpoint has to send, including new + * packets and lost ones. + */ +static int conn_handshake_remnants_left(ngtcp2_conn *conn) { + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + + return !conn_is_handshake_completed(conn) || + (in_pktns && (in_pktns->rtb.num_pto_eliciting || + ngtcp2_ksl_len(&in_pktns->crypto.tx.frq))) || + (hs_pktns && (hs_pktns->rtb.num_pto_eliciting || + ngtcp2_ksl_len(&hs_pktns->crypto.tx.frq))); +} + +/* + * conn_retire_dcid_seq retires destination connection ID denoted by + * |seq|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CONNECTION_ID_LIMIT + * The number of unacknowledged retirement exceeds the limit. + */ +static int conn_retire_dcid_seq(ngtcp2_conn *conn, uint64_t seq) { + ngtcp2_pktns *pktns = &conn->pktns; + ngtcp2_frame_chain *nfrc; + int rv; + + rv = ngtcp2_conn_track_retired_dcid_seq(conn, seq); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + return rv; + } + + nfrc->fr.type = NGTCP2_FRAME_RETIRE_CONNECTION_ID; + nfrc->fr.retire_connection_id.seq = seq; + nfrc->next = pktns->tx.frq; + pktns->tx.frq = nfrc; + + return 0; +} + +/* + * conn_retire_dcid retires |dcid|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +static int conn_retire_dcid(ngtcp2_conn *conn, const ngtcp2_dcid *dcid, + ngtcp2_tstamp ts) { + ngtcp2_ringbuf *rb = &conn->dcid.retired.rb; + ngtcp2_dcid *dest, *stale_dcid; + int rv; + + assert(dcid->cid.datalen); + + if (ngtcp2_ringbuf_full(rb)) { + stale_dcid = ngtcp2_ringbuf_get(rb, 0); + rv = conn_call_deactivate_dcid(conn, stale_dcid); + if (rv != 0) { + return rv; + } + + ngtcp2_ringbuf_pop_front(rb); + } + + dest = ngtcp2_ringbuf_push_back(rb); + ngtcp2_dcid_copy(dest, dcid); + dest->retired_ts = ts; + + return conn_retire_dcid_seq(conn, dcid->seq); +} + +/* + * conn_bind_dcid stores the DCID to |*pdcid| bound to |path|. If + * such DCID is not found, bind the new DCID to |path| and stores it + * to |*pdcid|. If a remote endpoint uses zero-length connection ID, + * the pointer to conn->dcid.current is assigned to |*pdcid|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CONN_ID_BLOCKED + * No unused DCID is available + * NGTCP2_ERR_NOMEM + * Out of memory + */ +static int conn_bind_dcid(ngtcp2_conn *conn, ngtcp2_dcid **pdcid, + const ngtcp2_path *path, ngtcp2_tstamp ts) { + ngtcp2_dcid *dcid, *ndcid; + ngtcp2_cid cid; + size_t i, len; + int rv; + + assert(!ngtcp2_path_eq(&conn->dcid.current.ps.path, path)); + assert(!conn->pv || !ngtcp2_path_eq(&conn->pv->dcid.ps.path, path)); + assert(!conn->pv || !(conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) || + !ngtcp2_path_eq(&conn->pv->fallback_dcid.ps.path, path)); + + len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb); + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i); + + if (ngtcp2_path_eq(&dcid->ps.path, path)) { + *pdcid = dcid; + return 0; + } + } + + if (conn->dcid.current.cid.datalen == 0) { + ndcid = ngtcp2_ringbuf_push_back(&conn->dcid.bound.rb); + ngtcp2_cid_zero(&cid); + ngtcp2_dcid_init(ndcid, ++conn->dcid.zerolen_seq, &cid, NULL); + ngtcp2_dcid_set_path(ndcid, path); + + *pdcid = ndcid; + + return 0; + } + + if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) { + return NGTCP2_ERR_CONN_ID_BLOCKED; + } + + if (ngtcp2_ringbuf_full(&conn->dcid.bound.rb)) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, 0); + rv = conn_retire_dcid(conn, dcid, ts); + if (rv != 0) { + return rv; + } + } + + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0); + ndcid = ngtcp2_ringbuf_push_back(&conn->dcid.bound.rb); + + ngtcp2_dcid_copy(ndcid, dcid); + ndcid->bound_ts = ts; + ngtcp2_dcid_set_path(ndcid, path); + + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + + *pdcid = ndcid; + + return 0; +} + +static int conn_start_pmtud(ngtcp2_conn *conn) { + int rv; + size_t hard_max_udp_payload_size; + + assert(!conn->local.settings.no_pmtud); + assert(!conn->pmtud); + assert(conn_is_handshake_completed(conn)); + assert(conn->remote.transport_params); + assert(conn->remote.transport_params->max_udp_payload_size >= + NGTCP2_MAX_UDP_PAYLOAD_SIZE); + + hard_max_udp_payload_size = (size_t)ngtcp2_min( + conn->remote.transport_params->max_udp_payload_size, + (uint64_t)conn->local.settings.max_tx_udp_payload_size); + + rv = ngtcp2_pmtud_new(&conn->pmtud, conn->dcid.current.max_udp_payload_size, + hard_max_udp_payload_size, + conn->pktns.tx.last_pkt_num + 1, conn->mem); + if (rv != 0) { + return rv; + } + + if (ngtcp2_pmtud_finished(conn->pmtud)) { + ngtcp2_conn_stop_pmtud(conn); + } + + return 0; +} + +int ngtcp2_conn_start_pmtud(ngtcp2_conn *conn) { + return conn_start_pmtud(conn); +} + +void ngtcp2_conn_stop_pmtud(ngtcp2_conn *conn) { + if (!conn->pmtud) { + return; + } + + ngtcp2_pmtud_del(conn->pmtud); + + conn->pmtud = NULL; +} + +static ngtcp2_ssize conn_write_pmtud_probe(ngtcp2_conn *conn, + ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, ngtcp2_tstamp ts) { + size_t probelen; + ngtcp2_ssize nwrite; + ngtcp2_frame lfr; + + assert(conn->pmtud); + assert(!ngtcp2_pmtud_finished(conn->pmtud)); + + if (!ngtcp2_pmtud_require_probe(conn->pmtud)) { + return 0; + } + + probelen = ngtcp2_pmtud_probelen(conn->pmtud); + if (probelen > destlen) { + return 0; + } + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "sending PMTUD probe packet len=%zu", probelen); + + lfr.type = NGTCP2_FRAME_PING; + + nwrite = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, probelen, NGTCP2_PKT_1RTT, + NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING, &conn->dcid.current.cid, &lfr, + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING | + NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE, + NULL, ts); + if (nwrite < 0) { + return nwrite; + } + + assert(nwrite); + + ngtcp2_pmtud_probe_sent(conn->pmtud, conn_compute_pto(conn, &conn->pktns), + ts); + + return nwrite; +} + +/* + * conn_stop_pv stops the path validation which is currently running. + * This function does nothing if no path validation is currently being + * performed. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +static int conn_stop_pv(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + int rv = 0; + ngtcp2_pv *pv = conn->pv; + + if (pv == NULL) { + return 0; + } + + if (pv->dcid.cid.datalen && pv->dcid.seq != conn->dcid.current.seq) { + rv = conn_retire_dcid(conn, &pv->dcid, ts); + if (rv != 0) { + goto fin; + } + } + + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + pv->fallback_dcid.cid.datalen && + pv->fallback_dcid.seq != conn->dcid.current.seq && + pv->fallback_dcid.seq != pv->dcid.seq) { + rv = conn_retire_dcid(conn, &pv->fallback_dcid, ts); + if (rv != 0) { + goto fin; + } + } + +fin: + ngtcp2_pv_del(pv); + conn->pv = NULL; + + return rv; +} + +/* + * conn_abort_pv aborts the current path validation and frees + * resources allocated for it. This function assumes that conn->pv is + * not NULL. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_abort_pv(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + ngtcp2_pv *pv = conn->pv; + int rv; + + assert(pv); + + if (!(pv->flags & NGTCP2_PV_FLAG_DONT_CARE)) { + rv = conn_call_path_validation(conn, pv, + NGTCP2_PATH_VALIDATION_RESULT_ABORTED); + if (rv != 0) { + return rv; + } + } + + return conn_stop_pv(conn, ts); +} + +static size_t conn_shape_udp_payload(ngtcp2_conn *conn, const ngtcp2_dcid *dcid, + size_t payloadlen) { + if (conn->remote.transport_params && + conn->remote.transport_params->max_udp_payload_size) { + assert(conn->remote.transport_params->max_udp_payload_size >= + NGTCP2_MAX_UDP_PAYLOAD_SIZE); + + payloadlen = + (size_t)ngtcp2_min((uint64_t)payloadlen, + conn->remote.transport_params->max_udp_payload_size); + } + + payloadlen = + ngtcp2_min(payloadlen, conn->local.settings.max_tx_udp_payload_size); + + if (conn->local.settings.no_tx_udp_payload_size_shaping) { + return payloadlen; + } + + return ngtcp2_min(payloadlen, dcid->max_udp_payload_size); +} + +static void conn_reset_congestion_state(ngtcp2_conn *conn, ngtcp2_tstamp ts); + +/* + * conn_on_path_validation_failed is called when path validation + * fails. This function may delete |pv|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_on_path_validation_failed(ngtcp2_conn *conn, ngtcp2_pv *pv, + ngtcp2_tstamp ts) { + int rv; + + if (!(pv->flags & NGTCP2_PV_FLAG_DONT_CARE)) { + rv = conn_call_path_validation(conn, pv, + NGTCP2_PATH_VALIDATION_RESULT_FAILURE); + if (rv != 0) { + return rv; + } + } + + if (pv->flags & NGTCP2_PV_FLAG_MTU_PROBE) { + return NGTCP2_ERR_NO_VIABLE_PATH; + } + + if (pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) { + ngtcp2_dcid_copy(&conn->dcid.current, &pv->fallback_dcid); + conn_reset_congestion_state(conn, ts); + } + + return conn_stop_pv(conn, ts); +} + +/* + * conn_write_path_challenge writes a packet which includes + * PATH_CHALLENGE frame into |dest| of length |destlen|. + * + * This function returns the number of bytes written to |dest|, or one + * of the following negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static ngtcp2_ssize conn_write_path_challenge(ngtcp2_conn *conn, + ngtcp2_path *path, + ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + ngtcp2_tstamp ts) { + ngtcp2_ssize nwrite; + ngtcp2_tstamp expiry; + ngtcp2_pv *pv = conn->pv; + ngtcp2_frame lfr; + ngtcp2_duration timeout; + uint8_t flags; + uint64_t tx_left; + int rv; + + if (ngtcp2_pv_validation_timed_out(pv, ts)) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PTV, + "path validation was timed out"); + rv = conn_on_path_validation_failed(conn, pv, ts); + if (rv != 0) { + return rv; + } + + /* We might set path to the one which we just failed validate. + Set it to the current path here. */ + if (path) { + ngtcp2_path_copy(path, &conn->dcid.current.ps.path); + } + + return 0; + } + + ngtcp2_pv_handle_entry_expiry(pv, ts); + + if (!ngtcp2_pv_should_send_probe(pv)) { + return 0; + } + + rv = conn_call_get_path_challenge_data(conn, lfr.path_challenge.data); + if (rv != 0) { + return rv; + } + + lfr.type = NGTCP2_FRAME_PATH_CHALLENGE; + + timeout = conn_compute_pto(conn, &conn->pktns); + timeout = ngtcp2_max(timeout, 3 * conn->cstat.initial_rtt); + expiry = ts + timeout * (1ULL << pv->round); + + destlen = ngtcp2_min(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE); + + if (conn->server) { + if (!(pv->dcid.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { + tx_left = conn_server_tx_left(conn, &pv->dcid); + destlen = (size_t)ngtcp2_min((uint64_t)destlen, tx_left); + if (destlen == 0) { + return 0; + } + } + + if (destlen < NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + flags = NGTCP2_PV_ENTRY_FLAG_UNDERSIZED; + } else { + flags = NGTCP2_PV_ENTRY_FLAG_NONE; + } + } else { + flags = NGTCP2_PV_ENTRY_FLAG_NONE; + } + + ngtcp2_pv_add_entry(pv, lfr.path_challenge.data, expiry, flags, ts); + + nwrite = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, + &pv->dcid.cid, &lfr, + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING, + &pv->dcid.ps.path, ts); + if (nwrite <= 0) { + return nwrite; + } + + if (path) { + ngtcp2_path_copy(path, &pv->dcid.ps.path); + } + + if (ngtcp2_path_eq(&pv->dcid.ps.path, &conn->dcid.current.ps.path)) { + conn->dcid.current.bytes_sent += (uint64_t)nwrite; + } else { + pv->dcid.bytes_sent += (uint64_t)nwrite; + } + + return nwrite; +} + +/* + * conn_write_path_response writes a packet which includes + * PATH_RESPONSE frame into |dest| of length |destlen|. + * + * This function returns the number of bytes written to |dest|, or one + * of the following negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static ngtcp2_ssize conn_write_path_response(ngtcp2_conn *conn, + ngtcp2_path *path, + ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, ngtcp2_tstamp ts) { + ngtcp2_pv *pv = conn->pv; + ngtcp2_path_challenge_entry *pcent = NULL; + ngtcp2_dcid *dcid = NULL; + ngtcp2_frame lfr; + ngtcp2_ssize nwrite; + int rv; + uint64_t tx_left; + + for (; ngtcp2_ringbuf_len(&conn->rx.path_challenge.rb);) { + pcent = ngtcp2_ringbuf_get(&conn->rx.path_challenge.rb, 0); + + if (ngtcp2_path_eq(&conn->dcid.current.ps.path, &pcent->ps.path)) { + /* Send PATH_RESPONSE from conn_write_pkt. */ + return 0; + } + + if (pv) { + if (ngtcp2_path_eq(&pv->dcid.ps.path, &pcent->ps.path)) { + dcid = &pv->dcid; + break; + } + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + ngtcp2_path_eq(&pv->fallback_dcid.ps.path, &pcent->ps.path)) { + dcid = &pv->fallback_dcid; + break; + } + } + + if (conn->server) { + break; + } + + /* Client does not expect to respond to path validation against + unknown path */ + ngtcp2_ringbuf_pop_front(&conn->rx.path_challenge.rb); + pcent = NULL; + } + + if (pcent == NULL) { + return 0; + } + + if (dcid == NULL) { + /* client is expected to have |path| in conn->dcid.current or + conn->pv. */ + assert(conn->server); + + rv = conn_bind_dcid(conn, &dcid, &pcent->ps.path, ts); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + return 0; + } + } + + destlen = ngtcp2_min(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE); + + if (conn->server && !(dcid->flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { + tx_left = conn_server_tx_left(conn, dcid); + destlen = (size_t)ngtcp2_min((uint64_t)destlen, tx_left); + if (destlen == 0) { + return 0; + } + } + + lfr.type = NGTCP2_FRAME_PATH_RESPONSE; + memcpy(lfr.path_response.data, pcent->data, sizeof(lfr.path_response.data)); + + nwrite = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, + &dcid->cid, &lfr, NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING, &pcent->ps.path, + ts); + if (nwrite <= 0) { + return nwrite; + } + + if (path) { + ngtcp2_path_copy(path, &pcent->ps.path); + } + + ngtcp2_ringbuf_pop_front(&conn->rx.path_challenge.rb); + + dcid->bytes_sent += (uint64_t)nwrite; + + return nwrite; +} + +ngtcp2_ssize ngtcp2_conn_write_pkt_versioned(ngtcp2_conn *conn, + ngtcp2_path *path, + int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, ngtcp2_tstamp ts) { + return ngtcp2_conn_writev_stream_versioned( + conn, path, pkt_info_version, pi, dest, destlen, + /* pdatalen = */ NULL, NGTCP2_WRITE_STREAM_FLAG_NONE, + /* stream_id = */ -1, + /* datav = */ NULL, /* datavcnt = */ 0, ts); +} + +/* + * conn_on_version_negotiation is called when Version Negotiation + * packet is received. The function decodes the data in the buffer + * pointed by |payload| whose length is |payloadlen| as Version + * Negotiation packet payload. The packet header is given in |hd|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_INVALID_ARGUMENT + * Packet payload is badly formatted. + */ +static int conn_on_version_negotiation(ngtcp2_conn *conn, + const ngtcp2_pkt_hd *hd, + const uint8_t *payload, + size_t payloadlen) { + uint32_t sv[16]; + uint32_t *p; + int rv = 0; + size_t nsv; + size_t i; + + if (payloadlen % sizeof(uint32_t)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + /* Version Negotiation packet is ignored if client has reacted upon + Version Negotiation packet. */ + if (conn->local.settings.original_version != conn->client_chosen_version) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (payloadlen > sizeof(sv)) { + p = ngtcp2_mem_malloc(conn->mem, payloadlen); + if (p == NULL) { + return NGTCP2_ERR_NOMEM; + } + } else { + p = sv; + } + + nsv = ngtcp2_pkt_decode_version_negotiation(p, payload, payloadlen); + + ngtcp2_log_rx_vn(&conn->log, hd, p, nsv); + + ngtcp2_qlog_version_negotiation_pkt_received(&conn->qlog, hd, p, nsv); + + if (!ngtcp2_is_reserved_version(conn->local.settings.original_version)) { + for (i = 0; i < nsv; ++i) { + if (p[i] == conn->local.settings.original_version) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "ignore Version Negotiation because it contains the " + "original version"); + + rv = NGTCP2_ERR_INVALID_ARGUMENT; + goto fin; + } + } + } + + rv = conn_call_recv_version_negotiation(conn, hd, p, nsv); + if (rv != 0) { + goto fin; + } + +fin: + if (p != sv) { + ngtcp2_mem_free(conn->mem, p); + } + + return rv; +} + +static uint64_t conn_tx_strmq_first_cycle(ngtcp2_conn *conn) { + ngtcp2_strm *strm; + + if (ngtcp2_pq_empty(&conn->tx.strmq)) { + return 0; + } + + strm = ngtcp2_struct_of(ngtcp2_pq_top(&conn->tx.strmq), ngtcp2_strm, pe); + return strm->cycle; +} + +uint64_t ngtcp2_conn_tx_strmq_first_cycle(ngtcp2_conn *conn) { + ngtcp2_strm *strm; + + if (ngtcp2_pq_empty(&conn->tx.strmq)) { + return 0; + } + + strm = ngtcp2_struct_of(ngtcp2_pq_top(&conn->tx.strmq), ngtcp2_strm, pe); + return strm->cycle; +} + +int ngtcp2_conn_resched_frames(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_frame_chain **pfrc) { + ngtcp2_frame_chain **first = pfrc; + ngtcp2_frame_chain *frc; + ngtcp2_stream *sfr; + ngtcp2_strm *strm; + int rv; + int streamfrq_empty; + + if (*pfrc == NULL) { + return 0; + } + + for (; *pfrc;) { + switch ((*pfrc)->fr.type) { + case NGTCP2_FRAME_STREAM: + frc = *pfrc; + + *pfrc = frc->next; + frc->next = NULL; + sfr = &frc->fr.stream; + + strm = ngtcp2_conn_find_stream(conn, sfr->stream_id); + if (!strm) { + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + break; + } + streamfrq_empty = ngtcp2_strm_streamfrq_empty(strm); + rv = ngtcp2_strm_streamfrq_push(strm, frc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + if (!ngtcp2_strm_is_tx_queued(strm)) { + strm->cycle = conn_tx_strmq_first_cycle(conn); + rv = ngtcp2_conn_tx_strmq_push(conn, strm); + if (rv != 0) { + return rv; + } + } + if (streamfrq_empty) { + ++conn->tx.strmq_nretrans; + } + break; + case NGTCP2_FRAME_CRYPTO: + frc = *pfrc; + + *pfrc = frc->next; + frc->next = NULL; + + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, + &frc->fr.crypto.offset, frc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + break; + default: + pfrc = &(*pfrc)->next; + } + } + + *pfrc = pktns->tx.frq; + pktns->tx.frq = *first; + + return 0; +} + +/* + * conn_on_retry is called when Retry packet is received. The + * function decodes the data in the buffer pointed by |pkt| whose + * length is |pktlen| as Retry packet. The length of long packet + * header is given in |hdpktlen|. |pkt| includes packet header. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_INVALID_ARGUMENT + * Packet payload is badly formatted. + * NGTCP2_ERR_PROTO + * ODCID does not match; or Token is empty. + */ +static int conn_on_retry(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd, + size_t hdpktlen, const uint8_t *pkt, size_t pktlen, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_pkt_retry retry; + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_rtb *rtb = &conn->pktns.rtb; + ngtcp2_rtb *in_rtb; + uint8_t cidbuf[sizeof(retry.odcid.data) * 2 + 1]; + uint8_t *token; + + if (!in_pktns || conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) { + return 0; + } + + in_rtb = &in_pktns->rtb; + + rv = ngtcp2_pkt_decode_retry(&retry, pkt + hdpktlen, pktlen - hdpktlen); + if (rv != 0) { + return rv; + } + + retry.odcid = conn->dcid.current.cid; + + rv = ngtcp2_pkt_verify_retry_tag( + conn->client_chosen_version, &retry, pkt, pktlen, conn->callbacks.encrypt, + &conn->crypto.retry_aead, &conn->crypto.retry_aead_ctx); + if (rv != 0) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "unable to verify Retry packet integrity"); + return rv; + } + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, "odcid=0x%s", + (const char *)ngtcp2_encode_hex(cidbuf, retry.odcid.data, + retry.odcid.datalen)); + + if (retry.tokenlen == 0) { + return NGTCP2_ERR_PROTO; + } + + if (ngtcp2_cid_eq(&conn->dcid.current.cid, &hd->scid)) { + return 0; + } + + ngtcp2_qlog_retry_pkt_received(&conn->qlog, hd, &retry); + + /* DCID must be updated before invoking callback because client + generates new initial keys there. */ + conn->dcid.current.cid = hd->scid; + conn->retry_scid = hd->scid; + + conn->flags |= NGTCP2_CONN_FLAG_RECV_RETRY; + + rv = conn_call_recv_retry(conn, hd); + if (rv != 0) { + return rv; + } + + conn->state = NGTCP2_CS_CLIENT_INITIAL; + + /* Just freeing memory is dangerous because we might free twice. */ + + rv = ngtcp2_rtb_remove_all(rtb, conn, &conn->pktns, &conn->cstat); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_rtb_remove_all(in_rtb, conn, in_pktns, &conn->cstat); + if (rv != 0) { + return rv; + } + + ngtcp2_mem_free(conn->mem, (uint8_t *)conn->local.settings.token); + conn->local.settings.token = NULL; + conn->local.settings.tokenlen = 0; + + token = ngtcp2_mem_malloc(conn->mem, retry.tokenlen); + if (token == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_cpymem(token, retry.token, retry.tokenlen); + + conn->local.settings.token = token; + conn->local.settings.tokenlen = retry.tokenlen; + + reset_conn_stat_recovery(&conn->cstat); + conn_reset_congestion_state(conn, ts); + conn_reset_ecn_validation_state(conn); + + return 0; +} + +int ngtcp2_conn_detect_lost_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) { + return ngtcp2_rtb_detect_lost_pkt(&pktns->rtb, conn, pktns, cstat, ts); +} + +/* + * conn_recv_ack processes received ACK frame |fr|. |pkt_ts| is the + * timestamp when packet is received. |ts| should be the current + * time. Usually they are the same, but for buffered packets, + * |pkt_ts| would be earlier than |ts|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_ACK_FRAME + * ACK frame is malformed. + * NGTCP2_ERR_PROTO + * |fr| acknowledges a packet this endpoint has not sent. + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed. + */ +static int conn_recv_ack(ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_ack *fr, + ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts) { + int rv; + ngtcp2_frame_chain *frc = NULL; + ngtcp2_ssize num_acked; + ngtcp2_conn_stat *cstat = &conn->cstat; + + if (pktns->tx.last_pkt_num < fr->largest_ack) { + return NGTCP2_ERR_PROTO; + } + + rv = ngtcp2_pkt_validate_ack(fr); + if (rv != 0) { + return rv; + } + + ngtcp2_acktr_recv_ack(&pktns->acktr, fr); + + num_acked = ngtcp2_rtb_recv_ack(&pktns->rtb, fr, &conn->cstat, conn, pktns, + pkt_ts, ts); + if (num_acked < 0) { + /* TODO assert this */ + assert(ngtcp2_err_is_fatal((int)num_acked)); + ngtcp2_frame_chain_list_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return (int)num_acked; + } + + if (num_acked == 0) { + return 0; + } + + pktns->rtb.probe_pkt_left = 0; + + if (cstat->pto_count && + (conn->server || (conn->flags & NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED))) { + /* Reset PTO count but no less than 2 to avoid frequent probe + packet transmission. */ + cstat->pto_count = ngtcp2_min(cstat->pto_count, 2); + } + + ngtcp2_conn_set_loss_detection_timer(conn, ts); + + return 0; +} + +/* + * conn_assign_recved_ack_delay_unscaled assigns + * fr->ack_delay_unscaled. + */ +static void assign_recved_ack_delay_unscaled(ngtcp2_ack *fr, + uint64_t ack_delay_exponent) { + fr->ack_delay_unscaled = + fr->ack_delay * (1ULL << ack_delay_exponent) * NGTCP2_MICROSECONDS; +} + +/* + * conn_recv_max_stream_data processes received MAX_STREAM_DATA frame + * |fr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_STREAM_STATE + * Stream ID indicates that it is a local stream, and the local + * endpoint has not initiated it; or stream is peer initiated + * unidirectional stream. + * NGTCP2_ERR_STREAM_LIMIT + * Stream ID exceeds allowed limit. + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_recv_max_stream_data(ngtcp2_conn *conn, + const ngtcp2_max_stream_data *fr) { + ngtcp2_strm *strm; + ngtcp2_idtr *idtr; + int local_stream = conn_local_stream(conn, fr->stream_id); + int bidi = bidi_stream(fr->stream_id); + int rv; + + if (bidi) { + if (local_stream) { + if (conn->local.bidi.next_stream_id <= fr->stream_id) { + return NGTCP2_ERR_STREAM_STATE; + } + } else if (conn->remote.bidi.max_streams < + ngtcp2_ord_stream_id(fr->stream_id)) { + return NGTCP2_ERR_STREAM_LIMIT; + } + + idtr = &conn->remote.bidi.idtr; + } else { + if (!local_stream || conn->local.uni.next_stream_id <= fr->stream_id) { + return NGTCP2_ERR_STREAM_STATE; + } + + idtr = &conn->remote.uni.idtr; + } + + strm = ngtcp2_conn_find_stream(conn, fr->stream_id); + if (strm == NULL) { + if (local_stream) { + /* Stream has been closed. */ + return 0; + } + + rv = ngtcp2_idtr_open(idtr, fr->stream_id); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + assert(rv == NGTCP2_ERR_STREAM_IN_USE); + /* Stream has been closed. */ + return 0; + } + + strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc); + if (strm == NULL) { + return NGTCP2_ERR_NOMEM; + } + rv = ngtcp2_conn_init_stream(conn, strm, fr->stream_id, NULL); + if (rv != 0) { + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm); + return rv; + } + + rv = conn_call_stream_open(conn, strm); + if (rv != 0) { + return rv; + } + } + + if (strm->tx.max_offset < fr->max_stream_data) { + strm->tx.max_offset = fr->max_stream_data; + + /* Don't call callback if stream is half-closed local */ + if (strm->flags & NGTCP2_STRM_FLAG_SHUT_WR) { + return 0; + } + + rv = conn_call_extend_max_stream_data(conn, strm, fr->stream_id, + fr->max_stream_data); + if (rv != 0) { + return rv; + } + } + + return 0; +} + +/* + * conn_recv_max_data processes received MAX_DATA frame |fr|. + */ +static void conn_recv_max_data(ngtcp2_conn *conn, const ngtcp2_max_data *fr) { + conn->tx.max_offset = ngtcp2_max(conn->tx.max_offset, fr->max_data); +} + +/* + * conn_buffer_pkt buffers |pkt| of length |pktlen|, chaining it from + * |*ppc|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_buffer_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + const ngtcp2_path *path, const ngtcp2_pkt_info *pi, + const uint8_t *pkt, size_t pktlen, size_t dgramlen, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_pkt_chain **ppc = &pktns->rx.buffed_pkts, *pc; + size_t i; + for (i = 0; *ppc && i < NGTCP2_MAX_NUM_BUFFED_RX_PKTS; + ppc = &(*ppc)->next, ++i) + ; + + if (i == NGTCP2_MAX_NUM_BUFFED_RX_PKTS) { + return 0; + } + + rv = + ngtcp2_pkt_chain_new(&pc, path, pi, pkt, pktlen, dgramlen, ts, conn->mem); + if (rv != 0) { + return rv; + } + + *ppc = pc; + + return 0; +} + +static int ensure_decrypt_buffer(ngtcp2_vec *vec, size_t n, size_t initial, + const ngtcp2_mem *mem) { + uint8_t *nbuf; + size_t len; + + if (vec->len >= n) { + return 0; + } + + len = vec->len == 0 ? initial : vec->len * 2; + for (; len < n; len *= 2) + ; + nbuf = ngtcp2_mem_realloc(mem, vec->base, len); + if (nbuf == NULL) { + return NGTCP2_ERR_NOMEM; + } + vec->base = nbuf; + vec->len = len; + + return 0; +} + +/* + * conn_ensure_decrypt_hp_buffer ensures that + * conn->crypto.decrypt_hp_buf has at least |n| bytes space. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_ensure_decrypt_hp_buffer(ngtcp2_conn *conn, size_t n) { + return ensure_decrypt_buffer(&conn->crypto.decrypt_hp_buf, n, 256, conn->mem); +} + +/* + * conn_ensure_decrypt_buffer ensures that conn->crypto.decrypt_buf + * has at least |n| bytes space. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_ensure_decrypt_buffer(ngtcp2_conn *conn, size_t n) { + return ensure_decrypt_buffer(&conn->crypto.decrypt_buf, n, 2048, conn->mem); +} + +/* + * decrypt_pkt decrypts the data pointed by |payload| whose length is + * |payloadlen|, and writes plaintext data to the buffer pointed by + * |dest|. The buffer pointed by |aad| is the Additional + * Authenticated Data, and its length is |aadlen|. |pkt_num| is used + * to create a nonce. |ckm| is the cryptographic key, and iv to use. + * |decrypt| is a callback function which actually decrypts a packet. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed. + * NGTCP2_ERR_DECRYPT + * Failed to decrypt a packet. + */ +static ngtcp2_ssize decrypt_pkt(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const uint8_t *payload, size_t payloadlen, + const uint8_t *aad, size_t aadlen, + int64_t pkt_num, ngtcp2_crypto_km *ckm, + ngtcp2_decrypt decrypt) { + /* TODO nonce is limited to 64 bytes. */ + uint8_t nonce[64]; + int rv; + + assert(sizeof(nonce) >= ckm->iv.len); + + ngtcp2_crypto_create_nonce(nonce, ckm->iv.base, ckm->iv.len, pkt_num); + + rv = decrypt(dest, aead, &ckm->aead_ctx, payload, payloadlen, nonce, + ckm->iv.len, aad, aadlen); + + if (rv != 0) { + if (rv == NGTCP2_ERR_DECRYPT) { + return rv; + } + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + assert(payloadlen >= aead->max_overhead); + + return (ngtcp2_ssize)(payloadlen - aead->max_overhead); +} + +/* + * decrypt_hp decryptes packet header. The packet number starts at + * |pkt| + |pkt_num_offset|. The entire plaintext QUIC packet header + * will be written to the buffer pointed by |dest| whose capacity is + * |destlen|. + * + * This function returns the number of bytes written to |dest|, or one + * of the following negative error codes: + * + * NGTCP2_ERR_PROTO + * Packet is badly formatted + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed; or it does not return + * expected result. + */ +static ngtcp2_ssize +decrypt_hp(ngtcp2_pkt_hd *hd, uint8_t *dest, const ngtcp2_crypto_cipher *hp, + const uint8_t *pkt, size_t pktlen, size_t pkt_num_offset, + const ngtcp2_crypto_cipher_ctx *hp_ctx, ngtcp2_hp_mask hp_mask) { + size_t sample_offset; + uint8_t *p = dest; + uint8_t mask[NGTCP2_HP_SAMPLELEN]; + size_t i; + int rv; + + assert(hp_mask); + + if (pkt_num_offset + 4 + NGTCP2_HP_SAMPLELEN > pktlen) { + return NGTCP2_ERR_PROTO; + } + + p = ngtcp2_cpymem(p, pkt, pkt_num_offset); + + sample_offset = pkt_num_offset + 4; + + rv = hp_mask(mask, hp, hp_ctx, pkt + sample_offset); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) { + dest[0] = (uint8_t)(dest[0] ^ (mask[0] & 0x0f)); + } else { + dest[0] = (uint8_t)(dest[0] ^ (mask[0] & 0x1f)); + if (dest[0] & NGTCP2_SHORT_KEY_PHASE_BIT) { + hd->flags |= NGTCP2_PKT_FLAG_KEY_PHASE; + } + } + + hd->pkt_numlen = (size_t)((dest[0] & NGTCP2_PKT_NUMLEN_MASK) + 1); + + for (i = 0; i < hd->pkt_numlen; ++i) { + *p++ = *(pkt + pkt_num_offset + i) ^ mask[i + 1]; + } + + hd->pkt_num = ngtcp2_get_pkt_num(p - hd->pkt_numlen, hd->pkt_numlen); + + return p - dest; +} + +/* + * conn_emit_pending_crypto_data delivers pending stream data to the + * application due to packet reordering. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed + * NGTCP2_ERR_CRYPTO + * TLS backend reported error + */ +static int conn_emit_pending_crypto_data(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + ngtcp2_strm *strm, + uint64_t rx_offset) { + size_t datalen; + const uint8_t *data; + int rv; + uint64_t offset; + + if (!strm->rx.rob) { + return 0; + } + + for (;;) { + datalen = ngtcp2_rob_data_at(strm->rx.rob, &data, rx_offset); + if (datalen == 0) { + assert(rx_offset == ngtcp2_strm_rx_offset(strm)); + return 0; + } + + offset = rx_offset; + rx_offset += datalen; + + rv = conn_call_recv_crypto_data(conn, crypto_level, offset, data, datalen); + if (rv != 0) { + return rv; + } + + ngtcp2_rob_pop(strm->rx.rob, rx_offset - datalen, datalen); + } +} + +/* + * conn_recv_connection_close is called when CONNECTION_CLOSE or + * APPLICATION_CLOSE frame is received. + */ +static int conn_recv_connection_close(ngtcp2_conn *conn, + ngtcp2_connection_close *fr) { + ngtcp2_connection_close_error *ccerr = &conn->rx.ccerr; + + conn->state = NGTCP2_CS_DRAINING; + if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) { + ccerr->type = NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT; + } else { + ccerr->type = NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION; + } + ccerr->error_code = fr->error_code; + ccerr->frame_type = fr->frame_type; + + if (!fr->reasonlen) { + ccerr->reasonlen = 0; + + return 0; + } + + if (ccerr->reason == NULL) { + ccerr->reason = ngtcp2_mem_malloc( + conn->mem, NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN); + if (ccerr->reason == NULL) { + return NGTCP2_ERR_NOMEM; + } + } + + ccerr->reasonlen = + ngtcp2_min(fr->reasonlen, NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN); + ngtcp2_cpymem((uint8_t *)ccerr->reason, fr->reason, ccerr->reasonlen); + + return 0; +} + +static void conn_recv_path_challenge(ngtcp2_conn *conn, const ngtcp2_path *path, + ngtcp2_path_challenge *fr) { + ngtcp2_path_challenge_entry *ent; + + /* client only responds to PATH_CHALLENGE from the current path or + path which client is migrating to. */ + if (!conn->server && !ngtcp2_path_eq(&conn->dcid.current.ps.path, path) && + (!conn->pv || !ngtcp2_path_eq(&conn->pv->dcid.ps.path, path))) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "discard PATH_CHALLENGE from the path which is not current " + "or endpoint is migrating to"); + return; + } + + ent = ngtcp2_ringbuf_push_front(&conn->rx.path_challenge.rb); + ngtcp2_path_challenge_entry_init(ent, path, fr->data); +} + +/* + * conn_reset_congestion_state resets congestion state. + */ +static void conn_reset_congestion_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + conn_reset_conn_stat_cc(conn, &conn->cstat); + + conn->cc.reset(&conn->cc, &conn->cstat, ts); + + if (conn->hs_pktns) { + ngtcp2_rtb_reset_cc_state(&conn->hs_pktns->rtb, + conn->hs_pktns->tx.last_pkt_num + 1); + } + ngtcp2_rtb_reset_cc_state(&conn->pktns.rtb, conn->pktns.tx.last_pkt_num + 1); + ngtcp2_rst_init(&conn->rst); + + conn->tx.pacing.next_ts = UINT64_MAX; +} + +static int conn_recv_path_response(ngtcp2_conn *conn, ngtcp2_path_response *fr, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_duration pto, timeout; + ngtcp2_pv *pv = conn->pv, *npv; + uint8_t ent_flags; + + if (!pv) { + return 0; + } + + rv = ngtcp2_pv_validate(pv, &ent_flags, fr->data); + if (rv != 0) { + assert(!ngtcp2_err_is_fatal(rv)); + + return 0; + } + + if (!(pv->flags & NGTCP2_PV_FLAG_DONT_CARE)) { + if (!(pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE)) { + if (pv->dcid.seq != conn->dcid.current.seq) { + assert(conn->dcid.current.cid.datalen); + + rv = conn_retire_dcid(conn, &conn->dcid.current, ts); + if (rv != 0) { + return rv; + } + ngtcp2_dcid_copy(&conn->dcid.current, &pv->dcid); + } + conn_reset_congestion_state(conn, ts); + conn_reset_ecn_validation_state(conn); + } + + if (ngtcp2_path_eq(&pv->dcid.ps.path, &conn->dcid.current.ps.path)) { + conn->dcid.current.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED; + + if (!conn->local.settings.no_pmtud) { + ngtcp2_conn_stop_pmtud(conn); + + if (!(pv->flags & NGTCP2_PV_ENTRY_FLAG_UNDERSIZED)) { + rv = conn_start_pmtud(conn); + if (rv != 0) { + return rv; + } + } + } + } + + rv = conn_call_path_validation(conn, pv, + NGTCP2_PATH_VALIDATION_RESULT_SUCCESS); + if (rv != 0) { + return rv; + } + } + + if (pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) { + pto = conn_compute_pto(conn, &conn->pktns); + timeout = 3 * ngtcp2_max(pto, pv->fallback_pto); + + if (ent_flags & NGTCP2_PV_ENTRY_FLAG_UNDERSIZED) { + assert(conn->server); + + /* Validate path again */ + rv = ngtcp2_pv_new(&npv, &pv->dcid, timeout, + NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE | + NGTCP2_PV_FLAG_MTU_PROBE, + &conn->log, conn->mem); + if (rv != 0) { + return rv; + } + + npv->dcid.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED; + ngtcp2_dcid_copy(&npv->fallback_dcid, &pv->fallback_dcid); + npv->fallback_pto = pv->fallback_pto; + } else { + rv = ngtcp2_pv_new(&npv, &pv->fallback_dcid, timeout, + NGTCP2_PV_FLAG_DONT_CARE, &conn->log, conn->mem); + if (rv != 0) { + return rv; + } + } + + /* Unset the flag bit so that conn_stop_pv does not retire + DCID. */ + pv->flags &= (uint8_t)~NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE; + + rv = conn_stop_pv(conn, ts); + if (rv != 0) { + ngtcp2_pv_del(npv); + return rv; + } + + conn->pv = npv; + + return 0; + } + + return conn_stop_pv(conn, ts); +} + +/* + * pkt_num_bits returns the number of bits available when packet + * number is encoded in |pkt_numlen| bytes. + */ +static size_t pkt_num_bits(size_t pkt_numlen) { + switch (pkt_numlen) { + case 1: + return 8; + case 2: + return 16; + case 3: + return 24; + case 4: + return 32; + default: + ngtcp2_unreachable(); + } +} + +/* + * pktns_pkt_num_is_duplicate returns nonzero if |pkt_num| is + * duplicated packet number. + */ +static int pktns_pkt_num_is_duplicate(ngtcp2_pktns *pktns, int64_t pkt_num) { + return ngtcp2_gaptr_is_pushed(&pktns->rx.pngap, (uint64_t)pkt_num, 1); +} + +/* + * pktns_commit_recv_pkt_num marks packet number |pkt_num| as + * received. + */ +static int pktns_commit_recv_pkt_num(ngtcp2_pktns *pktns, int64_t pkt_num, + int ack_eliciting, ngtcp2_tstamp ts) { + int rv; + + if (ack_eliciting && pktns->rx.max_ack_eliciting_pkt_num + 1 != pkt_num) { + ngtcp2_acktr_immediate_ack(&pktns->acktr); + } + if (pktns->rx.max_pkt_num < pkt_num) { + pktns->rx.max_pkt_num = pkt_num; + pktns->rx.max_pkt_ts = ts; + } + if (ack_eliciting && pktns->rx.max_ack_eliciting_pkt_num < pkt_num) { + pktns->rx.max_ack_eliciting_pkt_num = pkt_num; + } + + rv = ngtcp2_gaptr_push(&pktns->rx.pngap, (uint64_t)pkt_num, 1); + if (rv != 0) { + return rv; + } + + if (ngtcp2_ksl_len(&pktns->rx.pngap.gap) > 256) { + ngtcp2_gaptr_drop_first_gap(&pktns->rx.pngap); + } + + return 0; +} + +/* + * verify_token verifies |hd| contains |token| in its token field. It + * returns 0 if it succeeds, or NGTCP2_ERR_PROTO. + */ +static int verify_token(const uint8_t *token, size_t tokenlen, + const ngtcp2_pkt_hd *hd) { + if (tokenlen == hd->tokenlen && ngtcp2_cmemeq(token, hd->token, tokenlen)) { + return 0; + } + return NGTCP2_ERR_PROTO; +} + +static void pktns_increase_ecn_counts(ngtcp2_pktns *pktns, + const ngtcp2_pkt_info *pi) { + switch (pi->ecn & NGTCP2_ECN_MASK) { + case NGTCP2_ECN_ECT_0: + ++pktns->rx.ecn.ect0; + break; + case NGTCP2_ECN_ECT_1: + ++pktns->rx.ecn.ect1; + break; + case NGTCP2_ECN_CE: + ++pktns->rx.ecn.ce; + break; + } +} + +/* + * vneg_available_versions_includes returns nonzero if + * |available_versions| of length |available_versionslen| includes + * |version|. |available_versions| is the wire image of + * available_versions field of version_information transport + * parameter, and each version is encoded in network byte order. + */ +static int vneg_available_versions_includes(const uint8_t *available_versions, + size_t available_versionslen, + uint32_t version) { + size_t i; + uint32_t v; + + assert(!(available_versionslen & 0x3)); + + if (available_versionslen == 0) { + return 0; + } + + for (i = 0; i < available_versionslen; i += sizeof(uint32_t)) { + available_versions = ngtcp2_get_uint32(&v, available_versions); + + if (version == v) { + return 1; + } + } + + return 0; +} + +static int conn_recv_crypto(ngtcp2_conn *conn, ngtcp2_crypto_level crypto_level, + ngtcp2_strm *strm, const ngtcp2_crypto *fr); + +static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, const uint8_t *pkt, + size_t pktlen, size_t dgramlen, + ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts); + +static int conn_process_buffered_protected_pkt(ngtcp2_conn *conn, + ngtcp2_pktns *pktns, + ngtcp2_tstamp ts); + +/* + * conn_recv_handshake_pkt processes received packet |pkt| whose + * length is |pktlen| during handshake period. The buffer pointed by + * |pkt| might contain multiple packets. This function only processes + * one packet. |pkt_ts| is the timestamp when packet is received. + * |ts| should be the current time. Usually they are the same, but + * for buffered packets, |pkt_ts| would be earlier than |ts|. + * + * This function returns the number of bytes it reads if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_RECV_VERSION_NEGOTIATION + * Version Negotiation packet is received. + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_DISCARD_PKT + * Packet was discarded because plain text header was malformed; + * or its payload could not be decrypted. + * NGTCP2_ERR_FRAME_FORMAT + * Frame is badly formatted + * NGTCP2_ERR_ACK_FRAME + * ACK frame is malformed. + * NGTCP2_ERR_CRYPTO + * TLS stack reported error. + * NGTCP2_ERR_PROTO + * Generic QUIC protocol error. + * + * In addition to the above error codes, error codes returned from + * conn_recv_pkt are also returned. + */ +static ngtcp2_ssize +conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, const uint8_t *pkt, + size_t pktlen, size_t dgramlen, ngtcp2_tstamp pkt_ts, + ngtcp2_tstamp ts) { + ngtcp2_ssize nread; + ngtcp2_pkt_hd hd; + ngtcp2_max_frame mfr; + ngtcp2_frame *fr = &mfr.fr; + int rv; + int require_ack = 0; + size_t hdpktlen; + const uint8_t *payload; + size_t payloadlen; + ngtcp2_ssize nwrite; + ngtcp2_crypto_aead *aead; + ngtcp2_crypto_cipher *hp; + ngtcp2_crypto_km *ckm; + ngtcp2_crypto_cipher_ctx *hp_ctx; + ngtcp2_hp_mask hp_mask; + ngtcp2_decrypt decrypt; + ngtcp2_pktns *pktns; + ngtcp2_strm *crypto; + ngtcp2_crypto_level crypto_level; + int invalid_reserved_bits = 0; + + if (pktlen == 0) { + return 0; + } + + if (!(pkt[0] & NGTCP2_HEADER_FORM_BIT)) { + if (conn->state == NGTCP2_CS_SERVER_INITIAL) { + /* Ignore 1RTT packet unless server's first Handshake packet has + been transmitted. */ + return (ngtcp2_ssize)pktlen; + } + + if (conn->pktns.crypto.rx.ckm) { + return 0; + } + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "buffering 1RTT packet len=%zu", pktlen); + + rv = conn_buffer_pkt(conn, &conn->pktns, path, pi, pkt, pktlen, dgramlen, + ts); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + return (ngtcp2_ssize)pktlen; + } + + nread = ngtcp2_pkt_decode_hd_long(&hd, pkt, pktlen); + if (nread < 0) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (hd.type == NGTCP2_PKT_VERSION_NEGOTIATION) { + hdpktlen = (size_t)nread; + + ngtcp2_log_rx_pkt_hd(&conn->log, &hd); + + if (conn->server) { + return NGTCP2_ERR_DISCARD_PKT; + } + + /* Receiving Version Negotiation packet after getting Handshake + packet from server is invalid. */ + if (conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (!ngtcp2_cid_eq(&conn->oscid, &hd.dcid)) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched DCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + + if (!ngtcp2_cid_eq(&conn->dcid.current.cid, &hd.scid)) { + /* Just discard invalid Version Negotiation packet */ + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched SCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + rv = conn_on_version_negotiation(conn, &hd, pkt + hdpktlen, + pktlen - hdpktlen); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + return NGTCP2_ERR_DISCARD_PKT; + } + return NGTCP2_ERR_RECV_VERSION_NEGOTIATION; + } else if (hd.type == NGTCP2_PKT_RETRY) { + hdpktlen = (size_t)nread; + + ngtcp2_log_rx_pkt_hd(&conn->log, &hd); + + if (conn->server) { + return NGTCP2_ERR_DISCARD_PKT; + } + + /* Receiving Retry packet after getting Initial packet from server + is invalid. */ + if (conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (conn->client_chosen_version != hd.version) { + return NGTCP2_ERR_DISCARD_PKT; + } + + rv = conn_on_retry(conn, &hd, hdpktlen, pkt, pktlen, ts); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + return NGTCP2_ERR_DISCARD_PKT; + } + return (ngtcp2_ssize)pktlen; + } + + if (pktlen < (size_t)nread + hd.len) { + return NGTCP2_ERR_DISCARD_PKT; + } + + pktlen = (size_t)nread + hd.len; + + if (!ngtcp2_is_supported_version(hd.version)) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (conn->server) { + if (hd.version != conn->client_chosen_version && + (!conn->negotiated_version || hd.version != conn->negotiated_version)) { + return NGTCP2_ERR_DISCARD_PKT; + } + } else if (hd.version != conn->client_chosen_version && + conn->negotiated_version && + hd.version != conn->negotiated_version) { + return NGTCP2_ERR_DISCARD_PKT; + } + + /* Quoted from spec: if subsequent packets of those types include a + different Source Connection ID, they MUST be discarded. */ + if ((conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) && + !ngtcp2_cid_eq(&conn->dcid.current.cid, &hd.scid)) { + ngtcp2_log_rx_pkt_hd(&conn->log, &hd); + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched SCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + + switch (hd.type) { + case NGTCP2_PKT_0RTT: + if (!conn->server) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (hd.version != conn->client_chosen_version) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) { + if (conn->early.ckm) { + ngtcp2_ssize nread2; + /* TODO Avoid to parse header twice. */ + nread2 = + conn_recv_pkt(conn, path, pi, pkt, pktlen, dgramlen, pkt_ts, ts); + if (nread2 < 0) { + return nread2; + } + } + + /* Discard 0-RTT packet if we don't have a key to decrypt it. */ + return (ngtcp2_ssize)pktlen; + } + + /* Buffer re-ordered 0-RTT packet. */ + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "buffering 0-RTT packet len=%zu", pktlen); + + rv = conn_buffer_pkt(conn, conn->in_pktns, path, pi, pkt, pktlen, dgramlen, + ts); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + return (ngtcp2_ssize)pktlen; + case NGTCP2_PKT_INITIAL: + if (!conn->in_pktns) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_PKT, + "Initial packet is discarded because keys have been discarded"); + return (ngtcp2_ssize)pktlen; + } + + assert(conn->in_pktns); + + if (conn->server) { + if (dgramlen < NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_PKT, + "Initial packet was ignored because it is included in UDP datagram " + "less than %zu bytes: %zu bytes", + NGTCP2_MAX_UDP_PAYLOAD_SIZE, dgramlen); + return NGTCP2_ERR_DISCARD_PKT; + } + if (conn->local.settings.tokenlen) { + rv = verify_token(conn->local.settings.token, + conn->local.settings.tokenlen, &hd); + if (rv != 0) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because token is invalid"); + return NGTCP2_ERR_DISCARD_PKT; + } + } + if ((conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) == 0) { + /* Set rcid here so that it is available to callback. If this + packet is discarded later in this function and no packet is + processed in this connection attempt so far, connection + will be dropped. */ + conn->rcid = hd.dcid; + + rv = conn_call_recv_client_initial(conn, &hd.dcid); + if (rv != 0) { + return rv; + } + } + } else { + if (hd.tokenlen != 0) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because token is not empty"); + return NGTCP2_ERR_DISCARD_PKT; + } + + if (hd.version != conn->client_chosen_version && + !conn->negotiated_version && conn->vneg.version != hd.version) { + if (!vneg_available_versions_includes(conn->vneg.available_versions, + conn->vneg.available_versionslen, + hd.version)) { + return NGTCP2_ERR_DISCARD_PKT; + } + + /* Install new Initial keys using QUIC version = hd.version */ + rv = conn_call_version_negotiation( + conn, hd.version, + (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) + ? &conn->dcid.current.cid + : &conn->rcid); + if (rv != 0) { + return rv; + } + + assert(conn->vneg.version == hd.version); + } + } + + pktns = conn->in_pktns; + crypto = &pktns->crypto.strm; + crypto_level = NGTCP2_CRYPTO_LEVEL_INITIAL; + + if (hd.version == conn->client_chosen_version) { + ckm = pktns->crypto.rx.ckm; + hp_ctx = &pktns->crypto.rx.hp_ctx; + } else { + assert(conn->vneg.version == hd.version); + + ckm = conn->vneg.rx.ckm; + hp_ctx = &conn->vneg.rx.hp_ctx; + } + + break; + case NGTCP2_PKT_HANDSHAKE: + if (hd.version != conn->negotiated_version) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (!conn->hs_pktns->crypto.rx.ckm) { + if (conn->server) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_PKT, + "Handshake packet at this point is unexpected and discarded"); + return (ngtcp2_ssize)pktlen; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "buffering Handshake packet len=%zu", pktlen); + + rv = conn_buffer_pkt(conn, conn->hs_pktns, path, pi, pkt, pktlen, + dgramlen, ts); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + return (ngtcp2_ssize)pktlen; + } + + pktns = conn->hs_pktns; + crypto = &pktns->crypto.strm; + crypto_level = NGTCP2_CRYPTO_LEVEL_HANDSHAKE; + ckm = pktns->crypto.rx.ckm; + hp_ctx = &pktns->crypto.rx.hp_ctx; + + break; + default: + /* unknown packet type */ + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of unknown packet type"); + return (ngtcp2_ssize)pktlen; + } + + hp_mask = conn->callbacks.hp_mask; + decrypt = conn->callbacks.decrypt; + aead = &pktns->crypto.ctx.aead; + hp = &pktns->crypto.ctx.hp; + + assert(ckm); + assert(hp_mask); + assert(decrypt); + + rv = conn_ensure_decrypt_hp_buffer(conn, (size_t)nread + 4); + if (rv != 0) { + return rv; + } + + nwrite = decrypt_hp(&hd, conn->crypto.decrypt_hp_buf.base, hp, pkt, pktlen, + (size_t)nread, hp_ctx, hp_mask); + if (nwrite < 0) { + if (ngtcp2_err_is_fatal((int)nwrite)) { + return nwrite; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "could not decrypt packet number"); + return NGTCP2_ERR_DISCARD_PKT; + } + + hdpktlen = (size_t)nwrite; + payload = pkt + hdpktlen; + payloadlen = hd.len - hd.pkt_numlen; + + hd.pkt_num = ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num, + pkt_num_bits(hd.pkt_numlen)); + if (hd.pkt_num > NGTCP2_MAX_PKT_NUM) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "pkn=%" PRId64 " is greater than maximum pkn", hd.pkt_num); + return NGTCP2_ERR_DISCARD_PKT; + } + + ngtcp2_log_rx_pkt_hd(&conn->log, &hd); + + rv = ngtcp2_pkt_verify_reserved_bits(conn->crypto.decrypt_hp_buf.base[0]); + if (rv != 0) { + invalid_reserved_bits = 1; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet has incorrect reserved bits"); + + /* Will return error after decrypting payload */ + } + + if (pktns_pkt_num_is_duplicate(pktns, hd.pkt_num)) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was discarded because of duplicated packet number"); + return NGTCP2_ERR_DISCARD_PKT; + } + + rv = conn_ensure_decrypt_buffer(conn, payloadlen); + if (rv != 0) { + return rv; + } + + nwrite = decrypt_pkt(conn->crypto.decrypt_buf.base, aead, payload, payloadlen, + conn->crypto.decrypt_hp_buf.base, hdpktlen, hd.pkt_num, + ckm, decrypt); + if (nwrite < 0) { + if (ngtcp2_err_is_fatal((int)nwrite)) { + return nwrite; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "could not decrypt packet payload"); + return NGTCP2_ERR_DISCARD_PKT; + } + + if (invalid_reserved_bits) { + return NGTCP2_ERR_PROTO; + } + + if (!conn->server && hd.version != conn->client_chosen_version && + !conn->negotiated_version) { + conn->negotiated_version = hd.version; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "the negotiated version is 0x%08x", + conn->negotiated_version); + } + + payload = conn->crypto.decrypt_buf.base; + payloadlen = (size_t)nwrite; + + switch (hd.type) { + case NGTCP2_PKT_INITIAL: + if (!conn->server || ((conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) && + !ngtcp2_cid_eq(&conn->rcid, &hd.dcid))) { + rv = conn_verify_dcid(conn, NULL, &hd); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched DCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + } + break; + case NGTCP2_PKT_HANDSHAKE: + rv = conn_verify_dcid(conn, NULL, &hd); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched DCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + break; + default: + ngtcp2_unreachable(); + } + + if (payloadlen == 0) { + /* QUIC packet must contain at least one frame */ + if (hd.type == NGTCP2_PKT_INITIAL) { + return NGTCP2_ERR_DISCARD_PKT; + } + return NGTCP2_ERR_PROTO; + } + + if (hd.type == NGTCP2_PKT_INITIAL && + !(conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED)) { + conn->flags |= NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED; + if (!conn->server) { + conn->dcid.current.cid = hd.scid; + } + } + + ngtcp2_qlog_pkt_received_start(&conn->qlog); + + for (; payloadlen;) { + nread = ngtcp2_pkt_decode_frame(fr, payload, payloadlen); + if (nread < 0) { + return nread; + } + + payload += nread; + payloadlen -= (size_t)nread; + + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + fr->ack.ack_delay = 0; + fr->ack.ack_delay_unscaled = 0; + break; + } + + ngtcp2_log_rx_fr(&conn->log, &hd, fr); + + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + if (!conn->server && hd.type == NGTCP2_PKT_HANDSHAKE) { + conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED; + } + rv = conn_recv_ack(conn, pktns, &fr->ack, pkt_ts, ts); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_PADDING: + break; + case NGTCP2_FRAME_CRYPTO: + if (!conn->server && !conn->negotiated_version && + ngtcp2_vec_len(fr->crypto.data, fr->crypto.datacnt)) { + conn->negotiated_version = hd.version; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "the negotiated version is 0x%08x", + conn->negotiated_version); + } + + rv = conn_recv_crypto(conn, crypto_level, crypto, &fr->crypto); + if (rv != 0) { + return rv; + } + require_ack = 1; + break; + case NGTCP2_FRAME_CONNECTION_CLOSE: + rv = conn_recv_connection_close(conn, &fr->connection_close); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_PING: + require_ack = 1; + break; + default: + return NGTCP2_ERR_PROTO; + } + + ngtcp2_qlog_write_frame(&conn->qlog, fr); + } + + if (hd.type == NGTCP2_PKT_HANDSHAKE) { + /* Successful processing of Handshake packet from a remote + endpoint validates its source address. */ + conn->dcid.current.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED; + } + + ngtcp2_qlog_pkt_received_end(&conn->qlog, &hd, pktlen); + + rv = pktns_commit_recv_pkt_num(pktns, hd.pkt_num, require_ack, pkt_ts); + if (rv != 0) { + return rv; + } + + pktns_increase_ecn_counts(pktns, pi); + + /* TODO Initial and Handshake are always acknowledged without + delay. */ + if (require_ack && + (++pktns->acktr.rx_npkt >= conn->local.settings.ack_thresh || + (pi->ecn & NGTCP2_ECN_MASK) == NGTCP2_ECN_CE)) { + ngtcp2_acktr_immediate_ack(&pktns->acktr); + } + + rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack, + pkt_ts); + if (rv != 0) { + return rv; + } + + conn_restart_timer_on_read(conn, ts); + + ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat); + + return conn->state == NGTCP2_CS_DRAINING ? NGTCP2_ERR_DRAINING + : (ngtcp2_ssize)pktlen; +} + +static int is_unrecoverable_error(int liberr) { + switch (liberr) { + case NGTCP2_ERR_CRYPTO: + case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM: + case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM: + case NGTCP2_ERR_TRANSPORT_PARAM: + case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE: + return 1; + } + + return 0; +} + +/* + * conn_recv_handshake_cpkt processes compound packet during + * handshake. The buffer pointed by |pkt| might contain multiple + * packets. The 1RTT packet must be the last one because it does not + * have payload length field. + * + * This function returns the same error code returned by + * conn_recv_handshake_pkt. + */ +static ngtcp2_ssize conn_recv_handshake_cpkt(ngtcp2_conn *conn, + const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, + const uint8_t *pkt, size_t pktlen, + ngtcp2_tstamp ts) { + ngtcp2_ssize nread; + size_t dgramlen = pktlen; + const uint8_t *origpkt = pkt; + uint32_t version; + + if (ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) { + conn->dcid.current.bytes_recv += dgramlen; + } + + while (pktlen) { + nread = + conn_recv_handshake_pkt(conn, path, pi, pkt, pktlen, dgramlen, ts, ts); + if (nread < 0) { + if (ngtcp2_err_is_fatal((int)nread)) { + return nread; + } + + if (nread == NGTCP2_ERR_DRAINING) { + return NGTCP2_ERR_DRAINING; + } + + if ((pkt[0] & NGTCP2_HEADER_FORM_BIT) && pktlen > 4) { + /* Not a Version Negotiation packet */ + ngtcp2_get_uint32(&version, &pkt[1]); + if (ngtcp2_pkt_get_type_long(version, pkt[0]) == NGTCP2_PKT_INITIAL) { + if (conn->server) { + if (is_unrecoverable_error((int)nread)) { + /* If server gets crypto error from TLS stack, it is + unrecoverable, therefore drop connection. */ + return nread; + } + + /* If server discards first Initial, then drop connection + state. This is because SCID in packet might be corrupted + and the current connection state might wrongly discard + valid packet and prevent the handshake from + completing. */ + if (conn->in_pktns && conn->in_pktns->rx.max_pkt_num == -1) { + return NGTCP2_ERR_DROP_CONN; + } + + return (ngtcp2_ssize)dgramlen; + } + /* client */ + if (is_unrecoverable_error((int)nread)) { + /* If client gets crypto error from TLS stack, it is + unrecoverable, therefore drop connection. */ + return nread; + } + return (ngtcp2_ssize)dgramlen; + } + } + + if (nread == NGTCP2_ERR_DISCARD_PKT) { + return (ngtcp2_ssize)dgramlen; + } + + return nread; + } + + if (nread == 0) { + assert(!(pkt[0] & NGTCP2_HEADER_FORM_BIT)); + return pkt - origpkt; + } + + assert(pktlen >= (size_t)nread); + pkt += nread; + pktlen -= (size_t)nread; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "read packet %td left %zu", nread, pktlen); + } + + return (ngtcp2_ssize)dgramlen; +} + +int ngtcp2_conn_init_stream(ngtcp2_conn *conn, ngtcp2_strm *strm, + int64_t stream_id, void *stream_user_data) { + int rv; + uint64_t max_rx_offset; + uint64_t max_tx_offset; + int local_stream = conn_local_stream(conn, stream_id); + + assert(conn->remote.transport_params); + + if (bidi_stream(stream_id)) { + if (local_stream) { + max_rx_offset = + conn->local.transport_params.initial_max_stream_data_bidi_local; + max_tx_offset = + conn->remote.transport_params->initial_max_stream_data_bidi_remote; + } else { + max_rx_offset = + conn->local.transport_params.initial_max_stream_data_bidi_remote; + max_tx_offset = + conn->remote.transport_params->initial_max_stream_data_bidi_local; + } + } else if (local_stream) { + max_rx_offset = 0; + max_tx_offset = conn->remote.transport_params->initial_max_stream_data_uni; + } else { + max_rx_offset = conn->local.transport_params.initial_max_stream_data_uni; + max_tx_offset = 0; + } + + ngtcp2_strm_init(strm, stream_id, NGTCP2_STRM_FLAG_NONE, max_rx_offset, + max_tx_offset, stream_user_data, &conn->frc_objalloc, + conn->mem); + + rv = ngtcp2_map_insert(&conn->strms, (ngtcp2_map_key_type)strm->stream_id, + strm); + if (rv != 0) { + assert(rv != NGTCP2_ERR_INVALID_ARGUMENT); + goto fail; + } + + return 0; + +fail: + ngtcp2_strm_free(strm); + return rv; +} + +/* + * conn_emit_pending_stream_data passes buffered ordered stream data + * to the application. |rx_offset| is the first offset to deliver to + * the application. This function assumes that the data up to + * |rx_offset| has been delivered already. This function only passes + * the ordered data without any gap. If there is a gap, it stops + * providing the data to the application, and returns. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed. + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_emit_pending_stream_data(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint64_t rx_offset) { + size_t datalen; + const uint8_t *data; + int rv; + uint64_t offset; + uint32_t sdflags; + int handshake_completed = conn_is_handshake_completed(conn); + + if (!strm->rx.rob) { + return 0; + } + + for (;;) { + /* Stop calling callback if application has called + ngtcp2_conn_shutdown_stream_read() inside the callback. + Because it doubly counts connection window. */ + if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) { + return 0; + } + + datalen = ngtcp2_rob_data_at(strm->rx.rob, &data, rx_offset); + if (datalen == 0) { + assert(rx_offset == ngtcp2_strm_rx_offset(strm)); + return 0; + } + + offset = rx_offset; + rx_offset += datalen; + + sdflags = NGTCP2_STREAM_DATA_FLAG_NONE; + if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) && + rx_offset == strm->rx.last_offset) { + sdflags |= NGTCP2_STREAM_DATA_FLAG_FIN; + } + if (!handshake_completed) { + sdflags |= NGTCP2_STREAM_DATA_FLAG_EARLY; + } + + rv = conn_call_recv_stream_data(conn, strm, sdflags, offset, data, datalen); + if (rv != 0) { + return rv; + } + + ngtcp2_rob_pop(strm->rx.rob, rx_offset - datalen, datalen); + } +} + +/* + * conn_recv_crypto is called when CRYPTO frame |fr| is received. + * |rx_offset_base| is the offset in the entire TLS handshake stream. + * fr->offset specifies the offset in each encryption level. + * |max_rx_offset| is, if it is nonzero, the maximum offset in the + * entire TLS handshake stream that |fr| can carry. |crypto_level| is + * the encryption level where this data is received. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_PROTO + * CRYPTO frame has invalid offset. + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CRYPTO + * TLS stack reported error. + * NGTCP2_ERR_FRAME_ENCODING + * The end offset exceeds the maximum value. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_recv_crypto(ngtcp2_conn *conn, ngtcp2_crypto_level crypto_level, + ngtcp2_strm *crypto, const ngtcp2_crypto *fr) { + uint64_t fr_end_offset; + uint64_t rx_offset; + int rv; + + if (fr->datacnt == 0) { + return 0; + } + + fr_end_offset = fr->offset + fr->data[0].len; + + if (NGTCP2_MAX_VARINT < fr_end_offset) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + rx_offset = ngtcp2_strm_rx_offset(crypto); + + if (fr_end_offset <= rx_offset) { + if (conn->server && + !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT) && + crypto_level == NGTCP2_CRYPTO_LEVEL_INITIAL) { + /* recovery draft: Speeding Up Handshake Completion + + When a server receives an Initial packet containing duplicate + CRYPTO data, it can assume the client did not receive all of + the server's CRYPTO data sent in Initial packets, or the + client's estimated RTT is too small. ... To speed up + handshake completion under these conditions, an endpoint MAY + send a packet containing unacknowledged CRYPTO data earlier + than the PTO expiry, subject to address validation limits; + ... */ + conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT; + conn->in_pktns->rtb.probe_pkt_left = 1; + conn->hs_pktns->rtb.probe_pkt_left = 1; + } + return 0; + } + + crypto->rx.last_offset = ngtcp2_max(crypto->rx.last_offset, fr_end_offset); + + /* TODO Before dispatching incoming data to TLS stack, make sure + that previous data in previous encryption level has been + completely sent to TLS stack. Usually, if data is left, it is an + error because key is generated after consuming all data in the + previous encryption level. */ + if (fr->offset <= rx_offset) { + size_t ncut = (size_t)(rx_offset - fr->offset); + const uint8_t *data = fr->data[0].base + ncut; + size_t datalen = fr->data[0].len - ncut; + uint64_t offset = rx_offset; + + rx_offset += datalen; + rv = ngtcp2_strm_update_rx_offset(crypto, rx_offset); + if (rv != 0) { + return rv; + } + + rv = conn_call_recv_crypto_data(conn, crypto_level, offset, data, datalen); + if (rv != 0) { + return rv; + } + + rv = conn_emit_pending_crypto_data(conn, crypto_level, crypto, rx_offset); + if (rv != 0) { + return rv; + } + + return 0; + } + + if (fr_end_offset - rx_offset > NGTCP2_MAX_REORDERED_CRYPTO_DATA) { + return NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED; + } + + return ngtcp2_strm_recv_reordering(crypto, fr->data[0].base, fr->data[0].len, + fr->offset); +} + +/* + * conn_max_data_violated returns nonzero if receiving |datalen| + * violates connection flow control on local endpoint. + */ +static int conn_max_data_violated(ngtcp2_conn *conn, uint64_t datalen) { + return conn->rx.max_offset - conn->rx.offset < datalen; +} + +/* + * conn_recv_stream is called when STREAM frame |fr| is received. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_STREAM_STATE + * STREAM frame is received for a local stream which is not + * initiated; or STREAM frame is received for a local + * unidirectional stream + * NGTCP2_ERR_STREAM_LIMIT + * STREAM frame has remote stream ID which is strictly greater + * than the allowed limit. + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_FLOW_CONTROL + * Flow control limit is violated; or the end offset of stream + * data is beyond the NGTCP2_MAX_VARINT. + * NGTCP2_ERR_FINAL_SIZE + * STREAM frame has strictly larger end offset than it is + * permitted. + */ +static int conn_recv_stream(ngtcp2_conn *conn, const ngtcp2_stream *fr) { + int rv; + ngtcp2_strm *strm; + ngtcp2_idtr *idtr; + uint64_t rx_offset, fr_end_offset; + int local_stream; + int bidi; + uint64_t datalen = ngtcp2_vec_len(fr->data, fr->datacnt); + uint32_t sdflags = NGTCP2_STREAM_DATA_FLAG_NONE; + + local_stream = conn_local_stream(conn, fr->stream_id); + bidi = bidi_stream(fr->stream_id); + + if (bidi) { + if (local_stream) { + if (conn->local.bidi.next_stream_id <= fr->stream_id) { + return NGTCP2_ERR_STREAM_STATE; + } + } else if (conn->remote.bidi.max_streams < + ngtcp2_ord_stream_id(fr->stream_id)) { + return NGTCP2_ERR_STREAM_LIMIT; + } + + idtr = &conn->remote.bidi.idtr; + } else { + if (local_stream) { + return NGTCP2_ERR_STREAM_STATE; + } + if (conn->remote.uni.max_streams < ngtcp2_ord_stream_id(fr->stream_id)) { + return NGTCP2_ERR_STREAM_LIMIT; + } + + idtr = &conn->remote.uni.idtr; + } + + if (NGTCP2_MAX_VARINT - datalen < fr->offset) { + return NGTCP2_ERR_FLOW_CONTROL; + } + + strm = ngtcp2_conn_find_stream(conn, fr->stream_id); + if (strm == NULL) { + if (local_stream) { + /* TODO The stream has been closed. This should be responded + with RESET_STREAM, or simply ignored. */ + return 0; + } + + rv = ngtcp2_idtr_open(idtr, fr->stream_id); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + assert(rv == NGTCP2_ERR_STREAM_IN_USE); + /* TODO The stream has been closed. This should be responded + with RESET_STREAM, or simply ignored. */ + return 0; + } + + strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc); + if (strm == NULL) { + return NGTCP2_ERR_NOMEM; + } + /* TODO Perhaps, call new_stream callback? */ + rv = ngtcp2_conn_init_stream(conn, strm, fr->stream_id, NULL); + if (rv != 0) { + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm); + return rv; + } + + if (!bidi) { + ngtcp2_strm_shutdown(strm, NGTCP2_STRM_FLAG_SHUT_WR); + strm->flags |= NGTCP2_STRM_FLAG_FIN_ACKED; + } + + rv = conn_call_stream_open(conn, strm); + if (rv != 0) { + return rv; + } + } + + fr_end_offset = fr->offset + datalen; + + if (strm->rx.max_offset < fr_end_offset) { + return NGTCP2_ERR_FLOW_CONTROL; + } + + if (strm->rx.last_offset < fr_end_offset) { + uint64_t len = fr_end_offset - strm->rx.last_offset; + + if (conn_max_data_violated(conn, len)) { + return NGTCP2_ERR_FLOW_CONTROL; + } + + conn->rx.offset += len; + + if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) { + ngtcp2_conn_extend_max_offset(conn, len); + } + } + + rx_offset = ngtcp2_strm_rx_offset(strm); + + if (fr->fin) { + if (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) { + if (strm->rx.last_offset != fr_end_offset) { + return NGTCP2_ERR_FINAL_SIZE; + } + + if (strm->flags & NGTCP2_STRM_FLAG_RECV_RST) { + return 0; + } + + if (rx_offset == fr_end_offset) { + return 0; + } + } else if (strm->rx.last_offset > fr_end_offset) { + return NGTCP2_ERR_FINAL_SIZE; + } else { + strm->rx.last_offset = fr_end_offset; + + ngtcp2_strm_shutdown(strm, NGTCP2_STRM_FLAG_SHUT_RD); + } + } else { + if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) && + strm->rx.last_offset < fr_end_offset) { + return NGTCP2_ERR_FINAL_SIZE; + } + + strm->rx.last_offset = ngtcp2_max(strm->rx.last_offset, fr_end_offset); + + if (fr_end_offset <= rx_offset) { + return 0; + } + + if (strm->flags & NGTCP2_STRM_FLAG_RECV_RST) { + return 0; + } + } + + if (fr->offset <= rx_offset) { + size_t ncut = (size_t)(rx_offset - fr->offset); + uint64_t offset = rx_offset; + const uint8_t *data; + int fin; + + if (fr->datacnt) { + data = fr->data[0].base + ncut; + datalen -= ncut; + + rx_offset += datalen; + rv = ngtcp2_strm_update_rx_offset(strm, rx_offset); + if (rv != 0) { + return rv; + } + } else { + data = NULL; + datalen = 0; + } + + if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) { + return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm); + } + + fin = (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) && + rx_offset == strm->rx.last_offset; + + if (fin || datalen) { + if (fin) { + sdflags |= NGTCP2_STREAM_DATA_FLAG_FIN; + } + if (!conn_is_handshake_completed(conn)) { + sdflags |= NGTCP2_STREAM_DATA_FLAG_EARLY; + } + rv = conn_call_recv_stream_data(conn, strm, sdflags, offset, data, + (size_t)datalen); + if (rv != 0) { + return rv; + } + + rv = conn_emit_pending_stream_data(conn, strm, rx_offset); + if (rv != 0) { + return rv; + } + } + } else if (fr->datacnt) { + rv = ngtcp2_strm_recv_reordering(strm, fr->data[0].base, fr->data[0].len, + fr->offset); + if (rv != 0) { + return rv; + } + } + return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm); +} + +/* + * conn_reset_stream adds RESET_STREAM frame to the transmission + * queue. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_reset_stream(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint64_t app_error_code) { + int rv; + ngtcp2_frame_chain *frc; + ngtcp2_pktns *pktns = &conn->pktns; + + rv = ngtcp2_frame_chain_objalloc_new(&frc, &conn->frc_objalloc); + if (rv != 0) { + return rv; + } + + frc->fr.type = NGTCP2_FRAME_RESET_STREAM; + frc->fr.reset_stream.stream_id = strm->stream_id; + frc->fr.reset_stream.app_error_code = app_error_code; + frc->fr.reset_stream.final_size = strm->tx.offset; + + /* TODO This prepends RESET_STREAM to pktns->tx.frq. */ + frc->next = pktns->tx.frq; + pktns->tx.frq = frc; + + return 0; +} + +/* + * conn_stop_sending adds STOP_SENDING frame to the transmission + * queue. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_stop_sending(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint64_t app_error_code) { + int rv; + ngtcp2_frame_chain *frc; + ngtcp2_pktns *pktns = &conn->pktns; + + rv = ngtcp2_frame_chain_objalloc_new(&frc, &conn->frc_objalloc); + if (rv != 0) { + return rv; + } + + frc->fr.type = NGTCP2_FRAME_STOP_SENDING; + frc->fr.stop_sending.stream_id = strm->stream_id; + frc->fr.stop_sending.app_error_code = app_error_code; + + /* TODO This prepends STOP_SENDING to pktns->tx.frq. */ + frc->next = pktns->tx.frq; + pktns->tx.frq = frc; + + return 0; +} + +/* + * handle_max_remote_streams_extension extends + * |*punsent_max_remote_streams| by |n| if a condition allows it. + */ +static void +handle_max_remote_streams_extension(uint64_t *punsent_max_remote_streams, + size_t n) { + if ( +#if SIZE_MAX > UINT32_MAX + NGTCP2_MAX_STREAMS < n || +#endif /* SIZE_MAX > UINT32_MAX */ + *punsent_max_remote_streams > (uint64_t)(NGTCP2_MAX_STREAMS - n)) { + *punsent_max_remote_streams = NGTCP2_MAX_STREAMS; + } else { + *punsent_max_remote_streams += n; + } +} + +/* + * conn_recv_reset_stream is called when RESET_STREAM |fr| is + * received. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_STREAM_STATE + * RESET_STREAM frame is received to the local stream which is not + * initiated. + * NGTCP2_ERR_STREAM_LIMIT + * RESET_STREAM frame has remote stream ID which is strictly + * greater than the allowed limit. + * NGTCP2_ERR_PROTO + * RESET_STREAM frame is received to the local unidirectional + * stream + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_FLOW_CONTROL + * Flow control limit is violated; or the final size is beyond the + * NGTCP2_MAX_VARINT. + * NGTCP2_ERR_FINAL_SIZE + * The final offset is strictly larger than it is permitted. + */ +static int conn_recv_reset_stream(ngtcp2_conn *conn, + const ngtcp2_reset_stream *fr) { + ngtcp2_strm *strm; + int local_stream = conn_local_stream(conn, fr->stream_id); + int bidi = bidi_stream(fr->stream_id); + uint64_t datalen; + ngtcp2_idtr *idtr; + int rv; + + /* TODO share this piece of code */ + if (bidi) { + if (local_stream) { + if (conn->local.bidi.next_stream_id <= fr->stream_id) { + return NGTCP2_ERR_STREAM_STATE; + } + } else if (conn->remote.bidi.max_streams < + ngtcp2_ord_stream_id(fr->stream_id)) { + return NGTCP2_ERR_STREAM_LIMIT; + } + + idtr = &conn->remote.bidi.idtr; + } else { + if (local_stream) { + return NGTCP2_ERR_PROTO; + } + if (conn->remote.uni.max_streams < ngtcp2_ord_stream_id(fr->stream_id)) { + return NGTCP2_ERR_STREAM_LIMIT; + } + + idtr = &conn->remote.uni.idtr; + } + + if (NGTCP2_MAX_VARINT < fr->final_size) { + return NGTCP2_ERR_FLOW_CONTROL; + } + + strm = ngtcp2_conn_find_stream(conn, fr->stream_id); + if (strm == NULL) { + if (local_stream) { + return 0; + } + + rv = ngtcp2_idtr_open(idtr, fr->stream_id); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + assert(rv == NGTCP2_ERR_STREAM_IN_USE); + return 0; + } + + if (conn_initial_stream_rx_offset(conn, fr->stream_id) < fr->final_size || + conn_max_data_violated(conn, fr->final_size)) { + return NGTCP2_ERR_FLOW_CONTROL; + } + + /* Stream is reset before we create ngtcp2_strm object. */ + conn->rx.offset += fr->final_size; + ngtcp2_conn_extend_max_offset(conn, fr->final_size); + + rv = conn_call_stream_reset(conn, fr->stream_id, fr->final_size, + fr->app_error_code, NULL); + if (rv != 0) { + return rv; + } + + /* There will be no activity in this stream because we got + RESET_STREAM and don't write stream data any further. This + effectively allows another new stream for peer. */ + if (bidi) { + handle_max_remote_streams_extension(&conn->remote.bidi.unsent_max_streams, + 1); + } else { + handle_max_remote_streams_extension(&conn->remote.uni.unsent_max_streams, + 1); + } + + return 0; + } + + if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD)) { + if (strm->rx.last_offset != fr->final_size) { + return NGTCP2_ERR_FINAL_SIZE; + } + } else if (strm->rx.last_offset > fr->final_size) { + return NGTCP2_ERR_FINAL_SIZE; + } + + if (strm->flags & NGTCP2_STRM_FLAG_RECV_RST) { + return 0; + } + + if (strm->rx.max_offset < fr->final_size) { + return NGTCP2_ERR_FLOW_CONTROL; + } + + datalen = fr->final_size - strm->rx.last_offset; + + if (conn_max_data_violated(conn, datalen)) { + return NGTCP2_ERR_FLOW_CONTROL; + } + + rv = conn_call_stream_reset(conn, fr->stream_id, fr->final_size, + fr->app_error_code, strm->stream_user_data); + if (rv != 0) { + return rv; + } + + /* Extend connection flow control window for the amount of data + which are not passed to application. */ + if (!(strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING)) { + ngtcp2_conn_extend_max_offset(conn, strm->rx.last_offset - + ngtcp2_strm_rx_offset(strm)); + } + + conn->rx.offset += datalen; + ngtcp2_conn_extend_max_offset(conn, datalen); + + strm->rx.last_offset = fr->final_size; + strm->flags |= NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_RECV_RST; + + ngtcp2_strm_set_app_error_code(strm, fr->app_error_code); + + return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm); +} + +/* + * conn_recv_stop_sending is called when STOP_SENDING |fr| is received. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_STREAM_STATE + * STOP_SENDING frame is received for a local stream which is not + * initiated; or STOP_SENDING frame is received for a local + * unidirectional stream. + * NGTCP2_ERR_STREAM_LIMIT + * STOP_SENDING frame has remote stream ID which is strictly + * greater than the allowed limit. + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_recv_stop_sending(ngtcp2_conn *conn, + const ngtcp2_stop_sending *fr) { + int rv; + ngtcp2_strm *strm; + ngtcp2_idtr *idtr; + int local_stream = conn_local_stream(conn, fr->stream_id); + int bidi = bidi_stream(fr->stream_id); + + if (bidi) { + if (local_stream) { + if (conn->local.bidi.next_stream_id <= fr->stream_id) { + return NGTCP2_ERR_STREAM_STATE; + } + } else if (conn->remote.bidi.max_streams < + ngtcp2_ord_stream_id(fr->stream_id)) { + return NGTCP2_ERR_STREAM_LIMIT; + } + + idtr = &conn->remote.bidi.idtr; + } else { + if (!local_stream || conn->local.uni.next_stream_id <= fr->stream_id) { + return NGTCP2_ERR_STREAM_STATE; + } + + idtr = &conn->remote.uni.idtr; + } + + strm = ngtcp2_conn_find_stream(conn, fr->stream_id); + if (strm == NULL) { + if (local_stream) { + return 0; + } + rv = ngtcp2_idtr_open(idtr, fr->stream_id); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + assert(rv == NGTCP2_ERR_STREAM_IN_USE); + return 0; + } + + /* Frame is received reset before we create ngtcp2_strm + object. */ + strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc); + if (strm == NULL) { + return NGTCP2_ERR_NOMEM; + } + rv = ngtcp2_conn_init_stream(conn, strm, fr->stream_id, NULL); + if (rv != 0) { + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm); + return rv; + } + + rv = conn_call_stream_open(conn, strm); + if (rv != 0) { + return rv; + } + } + + ngtcp2_strm_set_app_error_code(strm, fr->app_error_code); + + /* No RESET_STREAM is required if we have sent FIN and all data have + been acknowledged. */ + if (!ngtcp2_strm_is_all_tx_data_fin_acked(strm) && + !(strm->flags & NGTCP2_STRM_FLAG_SENT_RST)) { + rv = conn_reset_stream(conn, strm, fr->app_error_code); + if (rv != 0) { + return rv; + } + } + + strm->flags |= NGTCP2_STRM_FLAG_SHUT_WR | NGTCP2_STRM_FLAG_SENT_RST; + + if (ngtcp2_strm_is_tx_queued(strm) && !ngtcp2_strm_streamfrq_empty(strm)) { + assert(conn->tx.strmq_nretrans); + --conn->tx.strmq_nretrans; + } + + ngtcp2_strm_streamfrq_clear(strm); + + return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm); +} + +/* + * check_stateless_reset returns nonzero if Stateless Reset |sr| + * coming via |path| is valid against |dcid|. + */ +static int check_stateless_reset(const ngtcp2_dcid *dcid, + const ngtcp2_path *path, + const ngtcp2_pkt_stateless_reset *sr) { + return ngtcp2_path_eq(&dcid->ps.path, path) && + ngtcp2_dcid_verify_stateless_reset_token( + dcid, sr->stateless_reset_token) == 0; +} + +/* + * conn_on_stateless_reset decodes Stateless Reset from the buffer + * pointed by |payload| whose length is |payloadlen|. |payload| + * should start after first byte of packet. + * + * If Stateless Reset is decoded, and the Stateless Reset Token is + * validated, the connection is closed. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * Could not decode Stateless Reset; or Stateless Reset Token does + * not match; or No stateless reset token is available. + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed. + */ +static int conn_on_stateless_reset(ngtcp2_conn *conn, const ngtcp2_path *path, + const uint8_t *payload, size_t payloadlen) { + int rv = 1; + ngtcp2_pv *pv = conn->pv; + ngtcp2_dcid *dcid; + ngtcp2_pkt_stateless_reset sr; + size_t len, i; + + rv = ngtcp2_pkt_decode_stateless_reset(&sr, payload, payloadlen); + if (rv != 0) { + return rv; + } + + if (!check_stateless_reset(&conn->dcid.current, path, &sr) && + (!pv || (!check_stateless_reset(&pv->dcid, path, &sr) && + (!(pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) || + !check_stateless_reset(&pv->fallback_dcid, path, &sr))))) { + len = ngtcp2_ringbuf_len(&conn->dcid.retired.rb); + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, i); + if (check_stateless_reset(dcid, path, &sr)) { + break; + } + } + + if (i == len) { + len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb); + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i); + if (check_stateless_reset(dcid, path, &sr)) { + break; + } + } + + if (i == len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + } + } + + conn->state = NGTCP2_CS_DRAINING; + + ngtcp2_log_rx_sr(&conn->log, &sr); + + ngtcp2_qlog_stateless_reset_pkt_received(&conn->qlog, &sr); + + return conn_call_recv_stateless_reset(conn, &sr); +} + +/* + * conn_recv_max_streams processes the incoming MAX_STREAMS frame + * |fr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed. + * NGTCP2_ERR_FRAME_ENCODING + * The maximum streams field exceeds the maximum value. + */ +static int conn_recv_max_streams(ngtcp2_conn *conn, + const ngtcp2_max_streams *fr) { + uint64_t n; + + if (fr->max_streams > NGTCP2_MAX_STREAMS) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + n = ngtcp2_min(fr->max_streams, NGTCP2_MAX_STREAMS); + + if (fr->type == NGTCP2_FRAME_MAX_STREAMS_BIDI) { + if (conn->local.bidi.max_streams < n) { + conn->local.bidi.max_streams = n; + return conn_call_extend_max_local_streams_bidi(conn, n); + } + return 0; + } + + if (conn->local.uni.max_streams < n) { + conn->local.uni.max_streams = n; + return conn_call_extend_max_local_streams_uni(conn, n); + } + return 0; +} + +static int conn_retire_dcid_prior_to(ngtcp2_conn *conn, ngtcp2_ringbuf *rb, + uint64_t retire_prior_to) { + size_t i; + ngtcp2_dcid *dcid, *last; + int rv; + + for (i = 0; i < ngtcp2_ringbuf_len(rb);) { + dcid = ngtcp2_ringbuf_get(rb, i); + if (dcid->seq >= retire_prior_to) { + ++i; + continue; + } + + rv = conn_retire_dcid_seq(conn, dcid->seq); + if (rv != 0) { + return rv; + } + + if (i == 0) { + ngtcp2_ringbuf_pop_front(rb); + continue; + } + + if (i == ngtcp2_ringbuf_len(rb) - 1) { + ngtcp2_ringbuf_pop_back(rb); + break; + } + + last = ngtcp2_ringbuf_get(rb, ngtcp2_ringbuf_len(rb) - 1); + ngtcp2_dcid_copy(dcid, last); + ngtcp2_ringbuf_pop_back(rb); + } + + return 0; +} + +/* + * conn_recv_new_connection_id processes the incoming + * NEW_CONNECTION_ID frame |fr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_PROTO + * |fr| has the duplicated sequence number with different CID or + * token; or DCID is zero-length. + */ +static int conn_recv_new_connection_id(ngtcp2_conn *conn, + const ngtcp2_new_connection_id *fr) { + size_t i, len; + ngtcp2_dcid *dcid; + ngtcp2_pv *pv = conn->pv; + int rv; + int found = 0; + size_t extra_dcid = 0; + + if (conn->dcid.current.cid.datalen == 0) { + return NGTCP2_ERR_PROTO; + } + + if (fr->retire_prior_to > fr->seq) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + rv = ngtcp2_dcid_verify_uniqueness(&conn->dcid.current, fr->seq, &fr->cid, + fr->stateless_reset_token); + if (rv != 0) { + return rv; + } + if (ngtcp2_cid_eq(&conn->dcid.current.cid, &fr->cid)) { + found = 1; + } + + if (pv) { + rv = ngtcp2_dcid_verify_uniqueness(&pv->dcid, fr->seq, &fr->cid, + fr->stateless_reset_token); + if (rv != 0) { + return rv; + } + if (ngtcp2_cid_eq(&pv->dcid.cid, &fr->cid)) { + found = 1; + } + } + + len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb); + + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i); + rv = ngtcp2_dcid_verify_uniqueness(dcid, fr->seq, &fr->cid, + fr->stateless_reset_token); + if (rv != 0) { + return NGTCP2_ERR_PROTO; + } + if (ngtcp2_cid_eq(&dcid->cid, &fr->cid)) { + found = 1; + } + } + + len = ngtcp2_ringbuf_len(&conn->dcid.unused.rb); + + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, i); + rv = ngtcp2_dcid_verify_uniqueness(dcid, fr->seq, &fr->cid, + fr->stateless_reset_token); + if (rv != 0) { + return NGTCP2_ERR_PROTO; + } + if (ngtcp2_cid_eq(&dcid->cid, &fr->cid)) { + found = 1; + } + } + + if (conn->dcid.retire_prior_to < fr->retire_prior_to) { + conn->dcid.retire_prior_to = fr->retire_prior_to; + + rv = conn_retire_dcid_prior_to(conn, &conn->dcid.bound.rb, + fr->retire_prior_to); + if (rv != 0) { + return rv; + } + + rv = conn_retire_dcid_prior_to(conn, &conn->dcid.unused.rb, + conn->dcid.retire_prior_to); + if (rv != 0) { + return rv; + } + } else if (fr->seq < conn->dcid.retire_prior_to) { + /* If packets are reordered, we might have retire_prior_to which + is larger than fr->seq. + + A malicious peer might send crafted NEW_CONNECTION_ID to force + local endpoint to create lots of RETIRE_CONNECTION_ID frames. + For example, a peer might send seq = 50000 and retire_prior_to + = 50000. Then send NEW_CONNECTION_ID frames with seq < + 50000. */ + return conn_retire_dcid_seq(conn, fr->seq); + } + + if (found) { + return 0; + } + + if (ngtcp2_gaptr_is_pushed(&conn->dcid.seqgap, fr->seq, 1)) { + return 0; + } + + rv = ngtcp2_gaptr_push(&conn->dcid.seqgap, fr->seq, 1); + if (rv != 0) { + return rv; + } + + if (ngtcp2_ksl_len(&conn->dcid.seqgap.gap) > 32) { + ngtcp2_gaptr_drop_first_gap(&conn->dcid.seqgap); + } + + len = ngtcp2_ringbuf_len(&conn->dcid.unused.rb); + + if (conn->dcid.current.seq >= conn->dcid.retire_prior_to) { + ++extra_dcid; + } + if (pv) { + if (pv->dcid.seq != conn->dcid.current.seq && + pv->dcid.seq >= conn->dcid.retire_prior_to) { + ++extra_dcid; + } + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + pv->fallback_dcid.seq != conn->dcid.current.seq && + pv->fallback_dcid.seq >= conn->dcid.retire_prior_to) { + ++extra_dcid; + } + } + + if (conn->local.transport_params.active_connection_id_limit <= + len + extra_dcid) { + return NGTCP2_ERR_CONNECTION_ID_LIMIT; + } + + if (len >= NGTCP2_MAX_DCID_POOL_SIZE) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "too many connection ID"); + return 0; + } + + dcid = ngtcp2_ringbuf_push_back(&conn->dcid.unused.rb); + ngtcp2_dcid_init(dcid, fr->seq, &fr->cid, fr->stateless_reset_token); + + return 0; +} + +/* + * conn_post_process_recv_new_connection_id handles retirement request + * of active DCIDs. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_post_process_recv_new_connection_id(ngtcp2_conn *conn, + ngtcp2_tstamp ts) { + ngtcp2_pv *pv = conn->pv; + ngtcp2_dcid *dcid; + int rv; + + if (conn->dcid.current.seq < conn->dcid.retire_prior_to) { + if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) { + return 0; + } + + rv = conn_retire_dcid(conn, &conn->dcid.current, ts); + if (rv != 0) { + return rv; + } + + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0); + if (pv) { + if (conn->dcid.current.seq == pv->dcid.seq) { + ngtcp2_dcid_copy_cid_token(&pv->dcid, dcid); + } + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + conn->dcid.current.seq == pv->fallback_dcid.seq) { + ngtcp2_dcid_copy_cid_token(&pv->fallback_dcid, dcid); + } + } + + ngtcp2_dcid_copy_cid_token(&conn->dcid.current, dcid); + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + + rv = conn_call_activate_dcid(conn, &conn->dcid.current); + if (rv != 0) { + return rv; + } + } + + if (pv) { + if (pv->dcid.seq < conn->dcid.retire_prior_to) { + if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb)) { + rv = conn_retire_dcid(conn, &pv->dcid, ts); + if (rv != 0) { + return rv; + } + + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0); + + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + pv->dcid.seq == pv->fallback_dcid.seq) { + ngtcp2_dcid_copy_cid_token(&pv->fallback_dcid, dcid); + } + + ngtcp2_dcid_copy_cid_token(&pv->dcid, dcid); + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + + rv = conn_call_activate_dcid(conn, &pv->dcid); + if (rv != 0) { + return rv; + } + } else { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PTV, + "path migration is aborted because connection ID is" + "retired and no unused connection ID is available"); + + return conn_abort_pv(conn, ts); + } + } + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + pv->fallback_dcid.seq < conn->dcid.retire_prior_to) { + if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb)) { + rv = conn_retire_dcid(conn, &pv->fallback_dcid, ts); + if (rv != 0) { + return rv; + } + + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0); + ngtcp2_dcid_copy_cid_token(&pv->fallback_dcid, dcid); + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + + rv = conn_call_activate_dcid(conn, &pv->fallback_dcid); + if (rv != 0) { + return rv; + } + } else { + /* Now we have no fallback dcid. */ + return conn_abort_pv(conn, ts); + } + } + } + + return 0; +} + +/* + * conn_recv_retire_connection_id processes the incoming + * RETIRE_CONNECTION_ID frame |fr|. |hd| is a packet header which + * |fr| is included. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_PROTO + * SCID is zero-length. + * NGTCP2_ERR_FRAME_ENCODING + * Attempt to retire CID which is used as DCID to send this frame. + */ +static int conn_recv_retire_connection_id(ngtcp2_conn *conn, + const ngtcp2_pkt_hd *hd, + const ngtcp2_retire_connection_id *fr, + ngtcp2_tstamp ts) { + ngtcp2_ksl_it it; + ngtcp2_scid *scid; + + if (conn->oscid.datalen == 0 || conn->scid.last_seq < fr->seq) { + return NGTCP2_ERR_PROTO; + } + + for (it = ngtcp2_ksl_begin(&conn->scid.set); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + scid = ngtcp2_ksl_it_get(&it); + if (scid->seq == fr->seq) { + if (ngtcp2_cid_eq(&scid->cid, &hd->dcid)) { + return NGTCP2_ERR_PROTO; + } + + if (!(scid->flags & NGTCP2_SCID_FLAG_RETIRED)) { + scid->flags |= NGTCP2_SCID_FLAG_RETIRED; + ++conn->scid.num_retired; + } + + if (scid->pe.index != NGTCP2_PQ_BAD_INDEX) { + ngtcp2_pq_remove(&conn->scid.used, &scid->pe); + scid->pe.index = NGTCP2_PQ_BAD_INDEX; + } + + scid->retired_ts = ts; + + return ngtcp2_pq_push(&conn->scid.used, &scid->pe); + } + } + + return 0; +} + +/* + * conn_recv_new_token processes the incoming NEW_TOKEN frame |fr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Token is empty + * NGTCP2_ERR_PROTO: + * Server received NEW_TOKEN. + */ +static int conn_recv_new_token(ngtcp2_conn *conn, const ngtcp2_new_token *fr) { + if (conn->server) { + return NGTCP2_ERR_PROTO; + } + + if (fr->tokenlen == 0) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + return conn_call_recv_new_token(conn, fr->token, fr->tokenlen); +} + +/* + * conn_recv_streams_blocked_bidi processes the incoming + * STREAMS_BLOCKED (0x16). + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Maximum Streams is larger than advertised value. + */ +static int conn_recv_streams_blocked_bidi(ngtcp2_conn *conn, + ngtcp2_streams_blocked *fr) { + if (fr->max_streams > conn->remote.bidi.max_streams) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + return 0; +} + +/* + * conn_recv_streams_blocked_uni processes the incoming + * STREAMS_BLOCKED (0x17). + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Maximum Streams is larger than advertised value. + */ +static int conn_recv_streams_blocked_uni(ngtcp2_conn *conn, + ngtcp2_streams_blocked *fr) { + if (fr->max_streams > conn->remote.uni.max_streams) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + return 0; +} + +/* + * conn_select_preferred_addr asks a client application to select a + * server address from preferred addresses received from server. If a + * client chooses the address, path validation will start. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_select_preferred_addr(ngtcp2_conn *conn) { + ngtcp2_path_storage ps; + int rv; + ngtcp2_duration pto, initial_pto, timeout; + ngtcp2_pv *pv; + ngtcp2_dcid *dcid; + + if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) { + return 0; + } + + ngtcp2_path_storage_zero(&ps); + ngtcp2_addr_copy(&ps.path.local, &conn->dcid.current.ps.path.local); + + rv = conn_call_select_preferred_addr(conn, &ps.path); + if (rv != 0) { + return rv; + } + + if (ps.path.remote.addrlen == 0 || + ngtcp2_addr_eq(&conn->dcid.current.ps.path.remote, &ps.path.remote)) { + return 0; + } + + assert(conn->pv == NULL); + + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0); + ngtcp2_dcid_set_path(dcid, &ps.path); + + pto = conn_compute_pto(conn, &conn->pktns); + initial_pto = conn_compute_initial_pto(conn, &conn->pktns); + timeout = 3 * ngtcp2_max(pto, initial_pto); + + rv = ngtcp2_pv_new(&pv, dcid, timeout, NGTCP2_PV_FLAG_PREFERRED_ADDR, + &conn->log, conn->mem); + if (rv != 0) { + /* TODO Call ngtcp2_dcid_free here if it is introduced */ + return rv; + } + + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + conn->pv = pv; + + return conn_call_activate_dcid(conn, &pv->dcid); +} + +/* + * conn_recv_handshake_done processes the incoming HANDSHAKE_DONE + * frame |fr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_PROTO + * Server received HANDSHAKE_DONE frame. + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_recv_handshake_done(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + int rv; + + if (conn->server) { + return NGTCP2_ERR_PROTO; + } + + if (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) { + return 0; + } + + conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED | + NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED; + + conn->pktns.rtb.persistent_congestion_start_ts = ts; + + conn_discard_handshake_state(conn, ts); + + assert(conn->remote.transport_params); + + if (conn->remote.transport_params->preferred_address_present) { + rv = conn_select_preferred_addr(conn); + if (rv != 0) { + return rv; + } + } + + rv = conn_call_handshake_confirmed(conn); + if (rv != 0) { + return rv; + } + + /* Re-arm loss detection timer after handshake has been + confirmed. */ + ngtcp2_conn_set_loss_detection_timer(conn, ts); + + return 0; +} + +/* + * conn_recv_datagram processes the incoming DATAGRAM frame |fr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +static int conn_recv_datagram(ngtcp2_conn *conn, ngtcp2_datagram *fr) { + assert(conn->local.transport_params.max_datagram_frame_size); + + return conn_call_recv_datagram(conn, fr); +} + +/* + * conn_key_phase_changed returns nonzero if |hd| indicates that the + * key phase has unexpected value. + */ +static int conn_key_phase_changed(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd) { + ngtcp2_pktns *pktns = &conn->pktns; + + return !(pktns->crypto.rx.ckm->flags & NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE) ^ + !(hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE); +} + +/* + * conn_prepare_key_update installs new updated keys. + */ +static int conn_prepare_key_update(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + int rv; + ngtcp2_tstamp confirmed_ts = conn->crypto.key_update.confirmed_ts; + ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns); + ngtcp2_pktns *pktns = &conn->pktns; + ngtcp2_crypto_km *rx_ckm = pktns->crypto.rx.ckm; + ngtcp2_crypto_km *tx_ckm = pktns->crypto.tx.ckm; + ngtcp2_crypto_km *new_rx_ckm, *new_tx_ckm; + ngtcp2_crypto_aead_ctx rx_aead_ctx = {0}, tx_aead_ctx = {0}; + size_t secretlen, ivlen; + + if ((conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) && + tx_ckm->use_count >= pktns->crypto.ctx.max_encryption && + ngtcp2_conn_initiate_key_update(conn, ts) != 0) { + return NGTCP2_ERR_AEAD_LIMIT_REACHED; + } + + if ((conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) || + (confirmed_ts != UINT64_MAX && confirmed_ts + pto > ts)) { + return 0; + } + + if (conn->crypto.key_update.new_rx_ckm || + conn->crypto.key_update.new_tx_ckm) { + assert(conn->crypto.key_update.new_rx_ckm); + assert(conn->crypto.key_update.new_tx_ckm); + return 0; + } + + secretlen = rx_ckm->secret.len; + ivlen = rx_ckm->iv.len; + + rv = ngtcp2_crypto_km_nocopy_new(&conn->crypto.key_update.new_rx_ckm, + secretlen, ivlen, conn->mem); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_crypto_km_nocopy_new(&conn->crypto.key_update.new_tx_ckm, + secretlen, ivlen, conn->mem); + if (rv != 0) { + return rv; + } + + new_rx_ckm = conn->crypto.key_update.new_rx_ckm; + new_tx_ckm = conn->crypto.key_update.new_tx_ckm; + + rv = conn_call_update_key( + conn, new_rx_ckm->secret.base, new_tx_ckm->secret.base, &rx_aead_ctx, + new_rx_ckm->iv.base, &tx_aead_ctx, new_tx_ckm->iv.base, + rx_ckm->secret.base, tx_ckm->secret.base, secretlen); + if (rv != 0) { + return rv; + } + + new_rx_ckm->aead_ctx = rx_aead_ctx; + new_tx_ckm->aead_ctx = tx_aead_ctx; + + if (!(rx_ckm->flags & NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE)) { + new_rx_ckm->flags |= NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE; + new_tx_ckm->flags |= NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE; + } + + if (conn->crypto.key_update.old_rx_ckm) { + conn_call_delete_crypto_aead_ctx( + conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx); + ngtcp2_crypto_km_del(conn->crypto.key_update.old_rx_ckm, conn->mem); + conn->crypto.key_update.old_rx_ckm = NULL; + } + + return 0; +} + +/* + * conn_rotate_keys rotates keys. The current key moves to old key, + * and new key moves to the current key. If the local endpoint + * initiated this key update, pass nonzero as |initiator|. + */ +static void conn_rotate_keys(ngtcp2_conn *conn, int64_t pkt_num, + int initiator) { + ngtcp2_pktns *pktns = &conn->pktns; + + assert(conn->crypto.key_update.new_rx_ckm); + assert(conn->crypto.key_update.new_tx_ckm); + assert(!conn->crypto.key_update.old_rx_ckm); + assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING)); + + conn->crypto.key_update.old_rx_ckm = pktns->crypto.rx.ckm; + + pktns->crypto.rx.ckm = conn->crypto.key_update.new_rx_ckm; + conn->crypto.key_update.new_rx_ckm = NULL; + pktns->crypto.rx.ckm->pkt_num = pkt_num; + + assert(pktns->crypto.tx.ckm); + + conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.tx.ckm->aead_ctx); + ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem); + + pktns->crypto.tx.ckm = conn->crypto.key_update.new_tx_ckm; + conn->crypto.key_update.new_tx_ckm = NULL; + pktns->crypto.tx.ckm->pkt_num = pktns->tx.last_pkt_num + 1; + + conn->flags |= NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED; + if (initiator) { + conn->flags |= NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR; + } +} + +/* + * conn_path_validation_in_progress returns nonzero if path validation + * against |path| is underway. + */ +static int conn_path_validation_in_progress(ngtcp2_conn *conn, + const ngtcp2_path *path) { + ngtcp2_pv *pv = conn->pv; + + return pv && ngtcp2_path_eq(&pv->dcid.ps.path, path); +} + +/* + * conn_recv_non_probing_pkt_on_new_path is called when non-probing + * packet is received via new path. It starts path validation against + * the new path. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CONN_ID_BLOCKED + * No DCID is available + * NGTCP2_ERR_NOMEM + * Out of memory + */ +static int conn_recv_non_probing_pkt_on_new_path(ngtcp2_conn *conn, + const ngtcp2_path *path, + size_t dgramlen, + int new_cid_used, + ngtcp2_tstamp ts) { + + ngtcp2_dcid dcid, *bound_dcid, *last; + ngtcp2_pv *pv; + int rv; + ngtcp2_duration pto, initial_pto, timeout; + int require_new_cid; + int local_addr_eq; + uint32_t remote_addr_cmp; + size_t len, i; + + assert(conn->server); + + if (conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + ngtcp2_path_eq(&conn->pv->fallback_dcid.ps.path, path)) { + /* If new path equals fallback path, that means connection + migrated back to the original path. Fallback path is + considered to be validated. */ + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PTV, + "path is migrated back to the original path"); + ngtcp2_dcid_copy(&conn->dcid.current, &conn->pv->fallback_dcid); + conn_reset_congestion_state(conn, ts); + conn->dcid.current.bytes_recv += dgramlen; + conn_reset_ecn_validation_state(conn); + + rv = conn_abort_pv(conn, ts); + if (rv != 0) { + return rv; + } + + /* Run PMTUD just in case if it is prematurely aborted */ + assert(!conn->pmtud); + + return conn_start_pmtud(conn); + } + + remote_addr_cmp = + ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote); + local_addr_eq = + ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, &path->local); + + /* + * When to change DCID? RFC 9002 section 9.5 says: + * + * An endpoint MUST NOT reuse a connection ID when sending from more + * than one local address -- for example, when initiating connection + * migration as described in Section 9.2 or when probing a new + * network path as described in Section 9.1. + * + * Similarly, an endpoint MUST NOT reuse a connection ID when + * sending to more than one destination address. Due to network + * changes outside the control of its peer, an endpoint might + * receive packets from a new source address with the same + * Destination Connection ID field value, in which case it MAY + * continue to use the current connection ID with the new remote + * address while still sending from the same local address. + */ + require_new_cid = conn->dcid.current.cid.datalen && + ((new_cid_used && remote_addr_cmp) || !local_addr_eq); + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "non-probing packet was received from new remote address"); + + pto = conn_compute_pto(conn, &conn->pktns); + initial_pto = conn_compute_initial_pto(conn, &conn->pktns); + timeout = 3 * ngtcp2_max(pto, initial_pto); + + len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb); + + for (i = 0; i < len; ++i) { + bound_dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i); + if (ngtcp2_path_eq(&bound_dcid->ps.path, path)) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_CON, + "Found DCID which has already been bound to the new path"); + + ngtcp2_dcid_copy(&dcid, bound_dcid); + if (i == 0) { + ngtcp2_ringbuf_pop_front(&conn->dcid.bound.rb); + } else if (i == ngtcp2_ringbuf_len(&conn->dcid.bound.rb) - 1) { + ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb); + } else { + last = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, len - 1); + ngtcp2_dcid_copy(bound_dcid, last); + ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb); + } + require_new_cid = 0; + + if (dcid.cid.datalen) { + rv = conn_call_activate_dcid(conn, &dcid); + if (rv != 0) { + return rv; + } + } + break; + } + } + + if (i == len) { + if (require_new_cid) { + if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) { + return NGTCP2_ERR_CONN_ID_BLOCKED; + } + ngtcp2_dcid_copy(&dcid, ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0)); + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + + rv = conn_call_activate_dcid(conn, &dcid); + if (rv != 0) { + return rv; + } + } else { + /* Use the current DCID if a remote endpoint does not change + DCID. */ + ngtcp2_dcid_copy(&dcid, &conn->dcid.current); + dcid.bytes_sent = 0; + dcid.bytes_recv = 0; + dcid.flags &= (uint8_t)~NGTCP2_DCID_FLAG_PATH_VALIDATED; + } + } + + ngtcp2_dcid_set_path(&dcid, path); + dcid.bytes_recv += dgramlen; + + rv = ngtcp2_pv_new(&pv, &dcid, timeout, NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE, + &conn->log, conn->mem); + if (rv != 0) { + return rv; + } + + if (conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE)) { + ngtcp2_dcid_copy(&pv->fallback_dcid, &conn->pv->fallback_dcid); + pv->fallback_pto = conn->pv->fallback_pto; + /* Unset the flag bit so that conn_stop_pv does not retire + DCID. */ + conn->pv->flags &= (uint8_t)~NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE; + } else { + ngtcp2_dcid_copy(&pv->fallback_dcid, &conn->dcid.current); + pv->fallback_pto = pto; + } + + if (!local_addr_eq || (remote_addr_cmp & (NGTCP2_ADDR_COMPARE_FLAG_ADDR | + NGTCP2_ADDR_COMPARE_FLAG_FAMILY))) { + conn_reset_congestion_state(conn, ts); + } else { + /* For NAT rebinding, keep max_udp_payload_size since client most + likely does not send a padded PATH_CHALLENGE. */ + dcid.max_udp_payload_size = ngtcp2_max( + dcid.max_udp_payload_size, conn->dcid.current.max_udp_payload_size); + } + + ngtcp2_dcid_copy(&conn->dcid.current, &dcid); + + conn_reset_ecn_validation_state(conn); + + ngtcp2_conn_stop_pmtud(conn); + + if (conn->pv) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_PTV, + "path migration is aborted because new migration has started"); + rv = conn_abort_pv(conn, ts); + if (rv != 0) { + return rv; + } + } + + conn->pv = pv; + + return 0; +} + +/* + * conn_recv_pkt_from_new_path is called when a 1RTT packet is + * received from new path (not current path). This packet would be a + * packet which only contains probing frame, or reordered packet, or a + * path is being validated. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CONN_ID_BLOCKED + * No unused DCID is available + * NGTCP2_ERR_NOMEM + * Out of memory + */ +static int conn_recv_pkt_from_new_path(ngtcp2_conn *conn, + const ngtcp2_path *path, size_t dgramlen, + int path_challenge_recved, + ngtcp2_tstamp ts) { + ngtcp2_pv *pv = conn->pv; + ngtcp2_dcid *bound_dcid; + int rv; + + if (pv) { + if (ngtcp2_path_eq(&pv->dcid.ps.path, path)) { + pv->dcid.bytes_recv += dgramlen; + return 0; + } + + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + ngtcp2_path_eq(&pv->fallback_dcid.ps.path, path)) { + pv->fallback_dcid.bytes_recv += dgramlen; + return 0; + } + } + + if (!path_challenge_recved) { + return 0; + } + + rv = conn_bind_dcid(conn, &bound_dcid, path, ts); + if (rv != 0) { + return rv; + } + + ngtcp2_dcid_set_path(bound_dcid, path); + bound_dcid->bytes_recv += dgramlen; + + return 0; +} + +/* + * conn_recv_delayed_handshake_pkt processes the received Handshake + * packet which is received after handshake completed. This function + * does the minimal job, and its purpose is send acknowledgement of + * this packet to the peer. We assume that hd->type == + * NGTCP2_PKT_HANDSHAKE. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Frame is badly formatted; or frame type is unknown. + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_DISCARD_PKT + * Packet was discarded. + * NGTCP2_ERR_ACK_FRAME + * ACK frame is malformed. + * NGTCP2_ERR_PROTO + * Frame that is not allowed in Handshake packet is received. + */ +static int +conn_recv_delayed_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_pkt_info *pi, + const ngtcp2_pkt_hd *hd, size_t pktlen, + const uint8_t *payload, size_t payloadlen, + ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts) { + ngtcp2_ssize nread; + ngtcp2_max_frame mfr; + ngtcp2_frame *fr = &mfr.fr; + int rv; + int require_ack = 0; + ngtcp2_pktns *pktns; + + assert(hd->type == NGTCP2_PKT_HANDSHAKE); + + pktns = conn->hs_pktns; + + if (payloadlen == 0) { + /* QUIC packet must contain at least one frame */ + return NGTCP2_ERR_PROTO; + } + + ngtcp2_qlog_pkt_received_start(&conn->qlog); + + for (; payloadlen;) { + nread = ngtcp2_pkt_decode_frame(fr, payload, payloadlen); + if (nread < 0) { + return (int)nread; + } + + payload += nread; + payloadlen -= (size_t)nread; + + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + fr->ack.ack_delay = 0; + fr->ack.ack_delay_unscaled = 0; + break; + } + + ngtcp2_log_rx_fr(&conn->log, hd, fr); + + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + if (!conn->server) { + conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED; + } + rv = conn_recv_ack(conn, pktns, &fr->ack, pkt_ts, ts); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_PADDING: + break; + case NGTCP2_FRAME_CONNECTION_CLOSE: + rv = conn_recv_connection_close(conn, &fr->connection_close); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_CRYPTO: + case NGTCP2_FRAME_PING: + require_ack = 1; + break; + default: + return NGTCP2_ERR_PROTO; + } + + ngtcp2_qlog_write_frame(&conn->qlog, fr); + } + + ngtcp2_qlog_pkt_received_end(&conn->qlog, hd, pktlen); + + rv = pktns_commit_recv_pkt_num(pktns, hd->pkt_num, require_ack, pkt_ts); + if (rv != 0) { + return rv; + } + + pktns_increase_ecn_counts(pktns, pi); + + if (require_ack && + (++pktns->acktr.rx_npkt >= conn->local.settings.ack_thresh || + (pi->ecn & NGTCP2_ECN_MASK) == NGTCP2_ECN_CE)) { + ngtcp2_acktr_immediate_ack(&pktns->acktr); + } + + rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd->pkt_num, require_ack, + pkt_ts); + if (rv != 0) { + return rv; + } + + conn_restart_timer_on_read(conn, ts); + + ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat); + + return 0; +} + +/* + * conn_allow_path_change_under_disable_active_migration returns + * nonzero if a packet from |path| is acceptable under + * disable_active_migration is on. + */ +static int +conn_allow_path_change_under_disable_active_migration(ngtcp2_conn *conn, + const ngtcp2_path *path) { + uint32_t remote_addr_cmp; + const ngtcp2_preferred_addr *paddr; + ngtcp2_addr addr; + + assert(conn->server); + assert(conn->local.transport_params.disable_active_migration); + + /* If local address does not change, it must be passive migration + (NAT rebinding). */ + if (ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, &path->local)) { + remote_addr_cmp = + ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote); + + return (remote_addr_cmp | NGTCP2_ADDR_COMPARE_FLAG_PORT) == + NGTCP2_ADDR_COMPARE_FLAG_PORT; + } + + /* If local address changes, it must be one of the preferred + addresses. */ + + if (!conn->local.transport_params.preferred_address_present) { + return 0; + } + + paddr = &conn->local.transport_params.preferred_address; + + if (paddr->ipv4_present) { + ngtcp2_addr_init(&addr, (const ngtcp2_sockaddr *)&paddr->ipv4, + sizeof(paddr->ipv4)); + + if (ngtcp2_addr_eq(&addr, &path->local)) { + return 1; + } + } + + if (paddr->ipv6_present) { + ngtcp2_addr_init(&addr, (const ngtcp2_sockaddr *)&paddr->ipv6, + sizeof(paddr->ipv6)); + + if (ngtcp2_addr_eq(&addr, &path->local)) { + return 1; + } + } + + return 0; +} + +/* + * conn_recv_pkt processes a packet contained in the buffer pointed by + * |pkt| of length |pktlen|. |pkt| may contain multiple QUIC packets. + * This function only processes the first packet. |pkt_ts| is the + * timestamp when packet is received. |ts| should be the current + * time. Usually they are the same, but for buffered packets, + * |pkt_ts| would be earlier than |ts|. + * + * This function returns the number of bytes processed if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_DISCARD_PKT + * Packet was discarded because plain text header was malformed; + * or its payload could not be decrypted. + * NGTCP2_ERR_PROTO + * Packet is badly formatted; or 0RTT packet contains other than + * PADDING or STREAM frames; or other QUIC protocol violation is + * found. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_FRAME_ENCODING + * Frame is badly formatted; or frame type is unknown. + * NGTCP2_ERR_ACK_FRAME + * ACK frame is malformed. + * NGTCP2_ERR_STREAM_STATE + * Frame is received to the local stream which is not initiated. + * NGTCP2_ERR_STREAM_LIMIT + * Frame has remote stream ID which is strictly greater than the + * allowed limit. + * NGTCP2_ERR_FLOW_CONTROL + * Flow control limit is violated. + * NGTCP2_ERR_FINAL_SIZE + * Frame has strictly larger end offset than it is permitted. + */ +static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, const uint8_t *pkt, + size_t pktlen, size_t dgramlen, + ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts) { + ngtcp2_pkt_hd hd; + int rv = 0; + size_t hdpktlen; + const uint8_t *payload; + size_t payloadlen; + ngtcp2_ssize nread, nwrite; + ngtcp2_max_frame mfr; + ngtcp2_frame *fr = &mfr.fr; + int require_ack = 0; + ngtcp2_crypto_aead *aead; + ngtcp2_crypto_cipher *hp; + ngtcp2_crypto_km *ckm; + ngtcp2_crypto_cipher_ctx *hp_ctx; + ngtcp2_hp_mask hp_mask; + ngtcp2_decrypt decrypt; + ngtcp2_pktns *pktns; + int non_probing_pkt = 0; + int key_phase_bit_changed = 0; + int force_decrypt_failure = 0; + int recv_ncid = 0; + int new_cid_used = 0; + int path_challenge_recved = 0; + + if (conn->server && conn->local.transport_params.disable_active_migration && + !ngtcp2_path_eq(&conn->dcid.current.ps.path, path) && + !conn_allow_path_change_under_disable_active_migration(conn, path)) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet is discarded because active migration is disabled"); + + return NGTCP2_ERR_DISCARD_PKT; + } + + if (pkt[0] & NGTCP2_HEADER_FORM_BIT) { + nread = ngtcp2_pkt_decode_hd_long(&hd, pkt, pktlen); + if (nread < 0) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "could not decode long header"); + return NGTCP2_ERR_DISCARD_PKT; + } + + if (pktlen < (size_t)nread + hd.len) { + return NGTCP2_ERR_DISCARD_PKT; + } + + assert(conn->negotiated_version); + + if (hd.version != conn->client_chosen_version && + hd.version != conn->negotiated_version) { + return NGTCP2_ERR_DISCARD_PKT; + } + + pktlen = (size_t)nread + hd.len; + + /* Quoted from spec: if subsequent packets of those types include + a different Source Connection ID, they MUST be discarded. */ + if (!ngtcp2_cid_eq(&conn->dcid.current.cid, &hd.scid)) { + ngtcp2_log_rx_pkt_hd(&conn->log, &hd); + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched SCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + + switch (hd.type) { + case NGTCP2_PKT_INITIAL: + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "delayed Initial packet was discarded"); + return (ngtcp2_ssize)pktlen; + case NGTCP2_PKT_HANDSHAKE: + if (hd.version != conn->negotiated_version) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (!conn->hs_pktns) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "delayed Handshake packet was discarded"); + return (ngtcp2_ssize)pktlen; + } + + pktns = conn->hs_pktns; + aead = &pktns->crypto.ctx.aead; + hp = &pktns->crypto.ctx.hp; + ckm = pktns->crypto.rx.ckm; + hp_ctx = &pktns->crypto.rx.hp_ctx; + hp_mask = conn->callbacks.hp_mask; + decrypt = conn->callbacks.decrypt; + break; + case NGTCP2_PKT_0RTT: + if (!conn->server || hd.version != conn->client_chosen_version) { + return NGTCP2_ERR_DISCARD_PKT; + } + + if (!conn->early.ckm) { + return (ngtcp2_ssize)pktlen; + } + + pktns = &conn->pktns; + aead = &conn->early.ctx.aead; + hp = &conn->early.ctx.hp; + ckm = conn->early.ckm; + hp_ctx = &conn->early.hp_ctx; + hp_mask = conn->callbacks.hp_mask; + decrypt = conn->callbacks.decrypt; + break; + default: + ngtcp2_log_rx_pkt_hd(&conn->log, &hd); + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet type 0x%02x was ignored", hd.type); + return (ngtcp2_ssize)pktlen; + } + } else { + nread = ngtcp2_pkt_decode_hd_short(&hd, pkt, pktlen, conn->oscid.datalen); + if (nread < 0) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "could not decode short header"); + return NGTCP2_ERR_DISCARD_PKT; + } + + pktns = &conn->pktns; + aead = &pktns->crypto.ctx.aead; + hp = &pktns->crypto.ctx.hp; + ckm = pktns->crypto.rx.ckm; + hp_ctx = &pktns->crypto.rx.hp_ctx; + hp_mask = conn->callbacks.hp_mask; + decrypt = conn->callbacks.decrypt; + } + + rv = conn_ensure_decrypt_hp_buffer(conn, (size_t)nread + 4); + if (rv != 0) { + return rv; + } + + nwrite = decrypt_hp(&hd, conn->crypto.decrypt_hp_buf.base, hp, pkt, pktlen, + (size_t)nread, hp_ctx, hp_mask); + if (nwrite < 0) { + if (ngtcp2_err_is_fatal((int)nwrite)) { + return nwrite; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "could not decrypt packet number"); + return NGTCP2_ERR_DISCARD_PKT; + } + + hdpktlen = (size_t)nwrite; + payload = pkt + hdpktlen; + payloadlen = pktlen - hdpktlen; + + hd.pkt_num = ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num, + pkt_num_bits(hd.pkt_numlen)); + if (hd.pkt_num > NGTCP2_MAX_PKT_NUM) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "pkn=%" PRId64 " is greater than maximum pkn", hd.pkt_num); + return NGTCP2_ERR_DISCARD_PKT; + } + + ngtcp2_log_rx_pkt_hd(&conn->log, &hd); + + if (hd.type == NGTCP2_PKT_1RTT) { + key_phase_bit_changed = conn_key_phase_changed(conn, &hd); + } + + rv = conn_ensure_decrypt_buffer(conn, payloadlen); + if (rv != 0) { + return rv; + } + + if (key_phase_bit_changed) { + assert(hd.type == NGTCP2_PKT_1RTT); + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, "unexpected KEY_PHASE"); + + if (ckm->pkt_num > hd.pkt_num) { + if (conn->crypto.key_update.old_rx_ckm) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "decrypting with old key"); + ckm = conn->crypto.key_update.old_rx_ckm; + } else { + force_decrypt_failure = 1; + } + } else if (pktns->rx.max_pkt_num < hd.pkt_num) { + assert(ckm->pkt_num < hd.pkt_num); + if (!conn->crypto.key_update.new_rx_ckm) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "new key is not available"); + force_decrypt_failure = 1; + } else { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "decrypting with new key"); + ckm = conn->crypto.key_update.new_rx_ckm; + } + } else { + force_decrypt_failure = 1; + } + } + + nwrite = decrypt_pkt(conn->crypto.decrypt_buf.base, aead, payload, payloadlen, + conn->crypto.decrypt_hp_buf.base, hdpktlen, hd.pkt_num, + ckm, decrypt); + + if (force_decrypt_failure) { + nwrite = NGTCP2_ERR_DECRYPT; + } + + if (nwrite < 0) { + if (ngtcp2_err_is_fatal((int)nwrite)) { + return nwrite; + } + + assert(NGTCP2_ERR_DECRYPT == nwrite); + + if (hd.type == NGTCP2_PKT_1RTT && + ++conn->crypto.decryption_failure_count >= + pktns->crypto.ctx.max_decryption_failure) { + return NGTCP2_ERR_AEAD_LIMIT_REACHED; + } + + if (hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "could not decrypt packet payload"); + return NGTCP2_ERR_DISCARD_PKT; + } + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "could not decrypt packet payload"); + return NGTCP2_ERR_DISCARD_PKT; + } + + rv = ngtcp2_pkt_verify_reserved_bits(conn->crypto.decrypt_hp_buf.base[0]); + if (rv != 0) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet has incorrect reserved bits"); + + return NGTCP2_ERR_PROTO; + } + + if (pktns_pkt_num_is_duplicate(pktns, hd.pkt_num)) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was discarded because of duplicated packet number"); + return NGTCP2_ERR_DISCARD_PKT; + } + + payload = conn->crypto.decrypt_buf.base; + payloadlen = (size_t)nwrite; + + if (payloadlen == 0) { + /* QUIC packet must contain at least one frame */ + return NGTCP2_ERR_PROTO; + } + + if (hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) { + switch (hd.type) { + case NGTCP2_PKT_HANDSHAKE: + rv = conn_verify_dcid(conn, NULL, &hd); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched DCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + + rv = conn_recv_delayed_handshake_pkt(conn, pi, &hd, pktlen, payload, + payloadlen, pkt_ts, ts); + if (rv < 0) { + return (ngtcp2_ssize)rv; + } + + return (ngtcp2_ssize)pktlen; + case NGTCP2_PKT_0RTT: + if (!ngtcp2_cid_eq(&conn->rcid, &hd.dcid)) { + rv = conn_verify_dcid(conn, NULL, &hd); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched DCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + } + break; + default: + /* Unreachable */ + ngtcp2_unreachable(); + } + } else { + rv = conn_verify_dcid(conn, &new_cid_used, &hd); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "packet was ignored because of mismatched DCID"); + return NGTCP2_ERR_DISCARD_PKT; + } + } + + ngtcp2_qlog_pkt_received_start(&conn->qlog); + + for (; payloadlen;) { + nread = ngtcp2_pkt_decode_frame(fr, payload, payloadlen); + if (nread < 0) { + return nread; + } + + payload += nread; + payloadlen -= (size_t)nread; + + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + if ((hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) && + hd.type == NGTCP2_PKT_0RTT) { + return NGTCP2_ERR_PROTO; + } + assert(conn->remote.transport_params); + assign_recved_ack_delay_unscaled( + &fr->ack, conn->remote.transport_params->ack_delay_exponent); + break; + } + + ngtcp2_log_rx_fr(&conn->log, &hd, fr); + + if (hd.type == NGTCP2_PKT_0RTT) { + switch (fr->type) { + case NGTCP2_FRAME_PADDING: + case NGTCP2_FRAME_PING: + case NGTCP2_FRAME_RESET_STREAM: + case NGTCP2_FRAME_STOP_SENDING: + case NGTCP2_FRAME_STREAM: + case NGTCP2_FRAME_MAX_DATA: + case NGTCP2_FRAME_MAX_STREAM_DATA: + case NGTCP2_FRAME_MAX_STREAMS_BIDI: + case NGTCP2_FRAME_MAX_STREAMS_UNI: + case NGTCP2_FRAME_DATA_BLOCKED: + case NGTCP2_FRAME_STREAM_DATA_BLOCKED: + case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: + case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: + case NGTCP2_FRAME_NEW_CONNECTION_ID: + case NGTCP2_FRAME_PATH_CHALLENGE: + case NGTCP2_FRAME_CONNECTION_CLOSE: + case NGTCP2_FRAME_CONNECTION_CLOSE_APP: + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + break; + default: + return NGTCP2_ERR_PROTO; + } + } + + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + case NGTCP2_FRAME_PADDING: + case NGTCP2_FRAME_CONNECTION_CLOSE: + case NGTCP2_FRAME_CONNECTION_CLOSE_APP: + break; + default: + require_ack = 1; + } + + switch (fr->type) { + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + if (!conn->server) { + conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED; + } + rv = conn_recv_ack(conn, pktns, &fr->ack, pkt_ts, ts); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_STREAM: + rv = conn_recv_stream(conn, &fr->stream); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_CRYPTO: + rv = conn_recv_crypto(conn, NGTCP2_CRYPTO_LEVEL_APPLICATION, + &pktns->crypto.strm, &fr->crypto); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_RESET_STREAM: + rv = conn_recv_reset_stream(conn, &fr->reset_stream); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_STOP_SENDING: + rv = conn_recv_stop_sending(conn, &fr->stop_sending); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_MAX_STREAM_DATA: + rv = conn_recv_max_stream_data(conn, &fr->max_stream_data); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_MAX_DATA: + conn_recv_max_data(conn, &fr->max_data); + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_MAX_STREAMS_BIDI: + case NGTCP2_FRAME_MAX_STREAMS_UNI: + rv = conn_recv_max_streams(conn, &fr->max_streams); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_CONNECTION_CLOSE: + case NGTCP2_FRAME_CONNECTION_CLOSE_APP: + rv = conn_recv_connection_close(conn, &fr->connection_close); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_PING: + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_PATH_CHALLENGE: + conn_recv_path_challenge(conn, path, &fr->path_challenge); + path_challenge_recved = 1; + break; + case NGTCP2_FRAME_PATH_RESPONSE: + rv = conn_recv_path_response(conn, &fr->path_response, ts); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_NEW_CONNECTION_ID: + rv = conn_recv_new_connection_id(conn, &fr->new_connection_id); + if (rv != 0) { + return rv; + } + recv_ncid = 1; + break; + case NGTCP2_FRAME_RETIRE_CONNECTION_ID: + rv = conn_recv_retire_connection_id(conn, &hd, &fr->retire_connection_id, + ts); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_NEW_TOKEN: + rv = conn_recv_new_token(conn, &fr->new_token); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_HANDSHAKE_DONE: + rv = conn_recv_handshake_done(conn, ts); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: + rv = conn_recv_streams_blocked_bidi(conn, &fr->streams_blocked); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: + rv = conn_recv_streams_blocked_uni(conn, &fr->streams_blocked); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_DATA_BLOCKED: + /* TODO Not implemented yet */ + non_probing_pkt = 1; + break; + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + if ((uint64_t)nread > + conn->local.transport_params.max_datagram_frame_size) { + return NGTCP2_ERR_PROTO; + } + rv = conn_recv_datagram(conn, &fr->datagram); + if (rv != 0) { + return rv; + } + non_probing_pkt = 1; + break; + } + + ngtcp2_qlog_write_frame(&conn->qlog, fr); + } + + ngtcp2_qlog_pkt_received_end(&conn->qlog, &hd, pktlen); + + if (recv_ncid) { + rv = conn_post_process_recv_new_connection_id(conn, ts); + if (rv != 0) { + return rv; + } + } + + if (conn->server && hd.type == NGTCP2_PKT_1RTT && + !ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) { + if (non_probing_pkt && pktns->rx.max_pkt_num < hd.pkt_num && + !conn_path_validation_in_progress(conn, path)) { + rv = conn_recv_non_probing_pkt_on_new_path(conn, path, dgramlen, + new_cid_used, ts); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + + /* DCID is not available. Just continue. */ + assert(NGTCP2_ERR_CONN_ID_BLOCKED == rv); + } + } else { + rv = conn_recv_pkt_from_new_path(conn, path, dgramlen, + path_challenge_recved, ts); + if (rv != 0) { + if (ngtcp2_err_is_fatal(rv)) { + return rv; + } + + /* DCID is not available. Just continue. */ + assert(NGTCP2_ERR_CONN_ID_BLOCKED == rv); + } + } + } + + if (hd.type == NGTCP2_PKT_1RTT) { + if (ckm == conn->crypto.key_update.new_rx_ckm) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "rotate keys"); + conn_rotate_keys(conn, hd.pkt_num, /* initiator = */ 0); + } else if (ckm->pkt_num > hd.pkt_num) { + ckm->pkt_num = hd.pkt_num; + } + + if (conn->server && conn->early.ckm && + conn->early.discard_started_ts == UINT64_MAX) { + conn->early.discard_started_ts = ts; + } + + if (ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) { + conn_update_keep_alive_last_ts(conn, ts); + } + } + + rv = pktns_commit_recv_pkt_num(pktns, hd.pkt_num, require_ack, pkt_ts); + if (rv != 0) { + return rv; + } + + pktns_increase_ecn_counts(pktns, pi); + + if (require_ack && + (++pktns->acktr.rx_npkt >= conn->local.settings.ack_thresh || + (pi->ecn & NGTCP2_ECN_MASK) == NGTCP2_ECN_CE)) { + ngtcp2_acktr_immediate_ack(&pktns->acktr); + } + + rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack, + pkt_ts); + if (rv != 0) { + return rv; + } + + conn_restart_timer_on_read(conn, ts); + + ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat); + + return conn->state == NGTCP2_CS_DRAINING ? NGTCP2_ERR_DRAINING + : (ngtcp2_ssize)pktlen; +} + +/* + * conn_process_buffered_protected_pkt processes buffered 0RTT or 1RTT + * packets. + * + * This function returns 0 if it succeeds, or the same negative error + * codes from conn_recv_pkt. + */ +static int conn_process_buffered_protected_pkt(ngtcp2_conn *conn, + ngtcp2_pktns *pktns, + ngtcp2_tstamp ts) { + ngtcp2_ssize nread; + ngtcp2_pkt_chain **ppc, *next; + int rv; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "processing buffered protected packet"); + + for (ppc = &pktns->rx.buffed_pkts; *ppc;) { + next = (*ppc)->next; + nread = conn_recv_pkt(conn, &(*ppc)->path.path, &(*ppc)->pi, (*ppc)->pkt, + (*ppc)->pktlen, (*ppc)->dgramlen, (*ppc)->ts, ts); + if (nread < 0 && !ngtcp2_err_is_fatal((int)nread) && + nread != NGTCP2_ERR_DRAINING) { + /* TODO We don't know this is the first QUIC packet in a + datagram. */ + rv = conn_on_stateless_reset(conn, &(*ppc)->path.path, (*ppc)->pkt, + (*ppc)->pktlen); + if (rv == 0) { + ngtcp2_pkt_chain_del(*ppc, conn->mem); + *ppc = next; + return NGTCP2_ERR_DRAINING; + } + } + + ngtcp2_pkt_chain_del(*ppc, conn->mem); + *ppc = next; + if (nread < 0) { + if (nread == NGTCP2_ERR_DISCARD_PKT) { + continue; + } + return (int)nread; + } + } + + return 0; +} + +/* + * conn_process_buffered_handshake_pkt processes buffered Handshake + * packets. + * + * This function returns 0 if it succeeds, or the same negative error + * codes from conn_recv_handshake_pkt. + */ +static int conn_process_buffered_handshake_pkt(ngtcp2_conn *conn, + ngtcp2_tstamp ts) { + ngtcp2_pktns *pktns = conn->hs_pktns; + ngtcp2_ssize nread; + ngtcp2_pkt_chain **ppc, *next; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "processing buffered handshake packet"); + + for (ppc = &pktns->rx.buffed_pkts; *ppc;) { + next = (*ppc)->next; + nread = conn_recv_handshake_pkt(conn, &(*ppc)->path.path, &(*ppc)->pi, + (*ppc)->pkt, (*ppc)->pktlen, + (*ppc)->dgramlen, (*ppc)->ts, ts); + ngtcp2_pkt_chain_del(*ppc, conn->mem); + *ppc = next; + if (nread < 0) { + if (nread == NGTCP2_ERR_DISCARD_PKT) { + continue; + } + return (int)nread; + } + } + + return 0; +} + +static void conn_sync_stream_id_limit(ngtcp2_conn *conn) { + ngtcp2_transport_params *params = conn->remote.transport_params; + + assert(params); + + conn->local.bidi.max_streams = params->initial_max_streams_bidi; + conn->local.uni.max_streams = params->initial_max_streams_uni; +} + +static int strm_set_max_offset(void *data, void *ptr) { + ngtcp2_conn *conn = ptr; + ngtcp2_transport_params *params = conn->remote.transport_params; + ngtcp2_strm *strm = data; + uint64_t max_offset; + int rv; + + assert(params); + + if (!conn_local_stream(conn, strm->stream_id)) { + return 0; + } + + if (bidi_stream(strm->stream_id)) { + max_offset = params->initial_max_stream_data_bidi_remote; + } else { + max_offset = params->initial_max_stream_data_uni; + } + + if (strm->tx.max_offset < max_offset) { + strm->tx.max_offset = max_offset; + + /* Don't call callback if stream is half-closed local */ + if (strm->flags & NGTCP2_STRM_FLAG_SHUT_WR) { + return 0; + } + + rv = conn_call_extend_max_stream_data(conn, strm, strm->stream_id, + strm->tx.max_offset); + if (rv != 0) { + return rv; + } + } + + return 0; +} + +static int conn_sync_stream_data_limit(ngtcp2_conn *conn) { + return ngtcp2_map_each(&conn->strms, strm_set_max_offset, conn); +} + +/* + * conn_handshake_completed is called once cryptographic handshake has + * completed. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed. + */ +static int conn_handshake_completed(ngtcp2_conn *conn) { + int rv; + + conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED; + + rv = conn_call_handshake_completed(conn); + if (rv != 0) { + return rv; + } + + if (conn->local.bidi.max_streams > 0) { + rv = conn_call_extend_max_local_streams_bidi(conn, + conn->local.bidi.max_streams); + if (rv != 0) { + return rv; + } + } + if (conn->local.uni.max_streams > 0) { + rv = conn_call_extend_max_local_streams_uni(conn, + conn->local.uni.max_streams); + if (rv != 0) { + return rv; + } + } + + return 0; +} + +/* + * conn_recv_cpkt processes compound packet after handshake. The + * buffer pointed by |pkt| might contain multiple packets. The 1RTT + * packet must be the last one because it does not have payload length + * field. + * + * This function returns 0 if it succeeds, or the same negative error + * codes from conn_recv_pkt except for NGTCP2_ERR_DISCARD_PKT. + */ +static int conn_recv_cpkt(ngtcp2_conn *conn, const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, const uint8_t *pkt, + size_t pktlen, ngtcp2_tstamp ts) { + ngtcp2_ssize nread; + int rv; + const uint8_t *origpkt = pkt; + size_t dgramlen = pktlen; + + if (ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) { + conn->dcid.current.bytes_recv += dgramlen; + } + + while (pktlen) { + nread = conn_recv_pkt(conn, path, pi, pkt, pktlen, dgramlen, ts, ts); + if (nread < 0) { + if (ngtcp2_err_is_fatal((int)nread)) { + return (int)nread; + } + + if (nread == NGTCP2_ERR_DRAINING) { + return NGTCP2_ERR_DRAINING; + } + + if (origpkt == pkt) { + rv = conn_on_stateless_reset(conn, path, origpkt, dgramlen); + if (rv == 0) { + return NGTCP2_ERR_DRAINING; + } + } + if (nread == NGTCP2_ERR_DISCARD_PKT) { + return 0; + } + return (int)nread; + } + + assert(pktlen >= (size_t)nread); + pkt += nread; + pktlen -= (size_t)nread; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, + "read packet %td left %zu", nread, pktlen); + } + + return 0; +} + +/* + * conn_is_retired_path returns nonzero if |path| is included in + * retired path list. + */ +static int conn_is_retired_path(ngtcp2_conn *conn, const ngtcp2_path *path) { + size_t i, len = ngtcp2_ringbuf_len(&conn->dcid.retired.rb); + ngtcp2_dcid *dcid; + + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, i); + if (ngtcp2_path_eq(&dcid->ps.path, path)) { + return 1; + } + } + + return 0; +} + +/* + * conn_enqueue_handshake_done enqueues HANDSHAKE_DONE frame for + * transmission. + */ +static int conn_enqueue_handshake_done(ngtcp2_conn *conn) { + ngtcp2_pktns *pktns = &conn->pktns; + ngtcp2_frame_chain *nfrc; + int rv; + + assert(conn->server); + + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc); + if (rv != 0) { + return rv; + } + + nfrc->fr.type = NGTCP2_FRAME_HANDSHAKE_DONE; + nfrc->next = pktns->tx.frq; + pktns->tx.frq = nfrc; + + return 0; +} + +/** + * @function + * + * `conn_read_handshake` performs QUIC cryptographic handshake by + * reading given data. |pkt| points to the buffer to read and + * |pktlen| is the length of the buffer. |path| is the network path. + * + * This function returns the number of bytes processed. Unless the + * last packet is 1RTT packet and an application decryption key has + * been installed, it returns |pktlen| if it succeeds. If it finds + * 1RTT packet and an application decryption key has been installed, + * it returns the number of bytes just before 1RTT packet begins. + * + * This function returns the number of bytes processed if it succeeds, + * or one of the following negative error codes: (TBD). + */ +static ngtcp2_ssize conn_read_handshake(ngtcp2_conn *conn, + const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, + const uint8_t *pkt, size_t pktlen, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_ssize nread; + + switch (conn->state) { + case NGTCP2_CS_CLIENT_INITIAL: + /* TODO Better to log something when we ignore input */ + return (ngtcp2_ssize)pktlen; + case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE: + nread = conn_recv_handshake_cpkt(conn, path, pi, pkt, pktlen, ts); + if (nread < 0) { + return nread; + } + + if (conn->state == NGTCP2_CS_CLIENT_INITIAL) { + /* Retry packet was received */ + return (ngtcp2_ssize)pktlen; + } + + assert(conn->hs_pktns); + + if (conn->hs_pktns->crypto.rx.ckm && conn->in_pktns) { + rv = conn_process_buffered_handshake_pkt(conn, ts); + if (rv != 0) { + return rv; + } + } + + if (conn_is_handshake_completed(conn) && + !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) { + rv = conn_handshake_completed(conn); + if (rv != 0) { + return rv; + } + + rv = conn_process_buffered_protected_pkt(conn, &conn->pktns, ts); + if (rv != 0) { + return rv; + } + } + + return nread; + case NGTCP2_CS_SERVER_INITIAL: + nread = conn_recv_handshake_cpkt(conn, path, pi, pkt, pktlen, ts); + if (nread < 0) { + return nread; + } + + /* + * Client ServerHello might not fit into single Initial packet + * (e.g., resuming session with client authentication). If we get + * Client Initial which does not increase offset or it is 0RTT + * packet buffered, perform address validation in order to buffer + * validated data only. + */ + if (ngtcp2_strm_rx_offset(&conn->in_pktns->crypto.strm) == 0) { + if (conn->in_pktns->crypto.strm.rx.rob && + ngtcp2_rob_data_buffered(conn->in_pktns->crypto.strm.rx.rob)) { + /* Address has been validated with token */ + if (conn->local.settings.tokenlen) { + return nread; + } + return NGTCP2_ERR_RETRY; + } + if (conn->in_pktns->rx.buffed_pkts) { + /* 0RTT is buffered, force retry */ + return NGTCP2_ERR_RETRY; + } + /* If neither CRYPTO frame nor 0RTT packet is processed, just + drop connection. */ + return NGTCP2_ERR_DROP_CONN; + } + + /* Process re-ordered 0-RTT packets which arrived before Initial + packet. */ + if (conn->early.ckm) { + assert(conn->in_pktns); + + rv = conn_process_buffered_protected_pkt(conn, conn->in_pktns, ts); + if (rv != 0) { + return rv; + } + } + + return nread; + case NGTCP2_CS_SERVER_WAIT_HANDSHAKE: + nread = conn_recv_handshake_cpkt(conn, path, pi, pkt, pktlen, ts); + if (nread < 0) { + return nread; + } + + if (conn->hs_pktns->crypto.rx.ckm) { + rv = conn_process_buffered_handshake_pkt(conn, ts); + if (rv != 0) { + return rv; + } + } + + if (conn->hs_pktns->rx.max_pkt_num != -1) { + conn_discard_initial_state(conn, ts); + } + + if (!conn_is_handshake_completed(conn)) { + /* If server hits amplification limit, it cancels loss detection + timer. If server receives a packet from client, the limit is + increased and server can send more. If server has + ack-eliciting Initial or Handshake packets, it should resend + it if timer fired but timer is not armed in this case. So + instead of resending Initial/Handshake packets, if server has + 1RTT data to send, it might send them and then might hit + amplification limit again until it hits stream data limit. + Initial/Handshake data is not resent. In order to avoid this + situation, try to arm loss detection and check the expiry + here so that on next write call, we can resend + Initial/Handshake first. */ + if (conn->cstat.loss_detection_timer == UINT64_MAX) { + ngtcp2_conn_set_loss_detection_timer(conn, ts); + if (ngtcp2_conn_loss_detection_expiry(conn) <= ts) { + rv = ngtcp2_conn_on_loss_detection_timer(conn, ts); + if (rv != 0) { + return rv; + } + } + } + + if ((size_t)nread < pktlen) { + /* We have 1RTT packet and application rx key, but the + handshake has not completed yet. */ + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "buffering 1RTT packet len=%zu", + pktlen - (size_t)nread); + + rv = conn_buffer_pkt(conn, &conn->pktns, path, pi, pkt + nread, + pktlen - (size_t)nread, pktlen, ts); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + return rv; + } + + return (ngtcp2_ssize)pktlen; + } + + return nread; + } + + if (!(conn->flags & NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED)) { + return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM; + } + + rv = conn_handshake_completed(conn); + if (rv != 0) { + return rv; + } + conn->state = NGTCP2_CS_POST_HANDSHAKE; + + rv = conn_call_activate_dcid(conn, &conn->dcid.current); + if (rv != 0) { + return rv; + } + + rv = conn_process_buffered_protected_pkt(conn, &conn->pktns, ts); + if (rv != 0) { + return rv; + } + + conn_discard_handshake_state(conn, ts); + + rv = conn_enqueue_handshake_done(conn); + if (rv != 0) { + return rv; + } + + if (!conn->local.settings.no_pmtud) { + rv = conn_start_pmtud(conn); + if (rv != 0) { + return rv; + } + } + + conn->pktns.rtb.persistent_congestion_start_ts = ts; + + /* Re-arm loss detection timer here after handshake has been + confirmed. */ + ngtcp2_conn_set_loss_detection_timer(conn, ts); + + return nread; + case NGTCP2_CS_CLOSING: + return NGTCP2_ERR_CLOSING; + case NGTCP2_CS_DRAINING: + return NGTCP2_ERR_DRAINING; + default: + return (ngtcp2_ssize)pktlen; + } +} + +int ngtcp2_conn_read_pkt_versioned(ngtcp2_conn *conn, const ngtcp2_path *path, + int pkt_info_version, + const ngtcp2_pkt_info *pi, + const uint8_t *pkt, size_t pktlen, + ngtcp2_tstamp ts) { + int rv = 0; + ngtcp2_ssize nread = 0; + const ngtcp2_pkt_info zero_pi = {0}; + (void)pkt_info_version; + + assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING)); + + conn->log.last_ts = ts; + conn->qlog.last_ts = ts; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "recv packet len=%zu", + pktlen); + + if (pktlen == 0) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + /* client does not expect a packet from unknown path. */ + if (!conn->server && !ngtcp2_path_eq(&conn->dcid.current.ps.path, path) && + (!conn->pv || !ngtcp2_path_eq(&conn->pv->dcid.ps.path, path)) && + !conn_is_retired_path(conn, path)) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "ignore packet from unknown path"); + return 0; + } + + if (!pi) { + pi = &zero_pi; + } + + switch (conn->state) { + case NGTCP2_CS_CLIENT_INITIAL: + case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE: + case NGTCP2_CS_CLIENT_TLS_HANDSHAKE_FAILED: + nread = conn_read_handshake(conn, path, pi, pkt, pktlen, ts); + if (nread < 0) { + return (int)nread; + } + + if ((size_t)nread == pktlen) { + return 0; + } + + assert(conn->pktns.crypto.rx.ckm); + + pkt += nread; + pktlen -= (size_t)nread; + + break; + case NGTCP2_CS_SERVER_INITIAL: + case NGTCP2_CS_SERVER_WAIT_HANDSHAKE: + case NGTCP2_CS_SERVER_TLS_HANDSHAKE_FAILED: + if (!ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "ignore packet from unknown path during handshake"); + + if (conn->state == NGTCP2_CS_SERVER_INITIAL && + ngtcp2_strm_rx_offset(&conn->in_pktns->crypto.strm) == 0 && + (!conn->in_pktns->crypto.strm.rx.rob || + !ngtcp2_rob_data_buffered(conn->in_pktns->crypto.strm.rx.rob))) { + return NGTCP2_ERR_DROP_CONN; + } + + return 0; + } + + nread = conn_read_handshake(conn, path, pi, pkt, pktlen, ts); + if (nread < 0) { + return (int)nread; + } + + if ((size_t)nread == pktlen) { + return 0; + } + + assert(conn->pktns.crypto.rx.ckm); + + pkt += nread; + pktlen -= (size_t)nread; + + break; + case NGTCP2_CS_CLOSING: + return NGTCP2_ERR_CLOSING; + case NGTCP2_CS_DRAINING: + return NGTCP2_ERR_DRAINING; + case NGTCP2_CS_POST_HANDSHAKE: + rv = conn_prepare_key_update(conn, ts); + if (rv != 0) { + return rv; + } + break; + default: + ngtcp2_unreachable(); + } + + return conn_recv_cpkt(conn, path, pi, pkt, pktlen, ts); +} + +/* + * conn_check_pkt_num_exhausted returns nonzero if packet number is + * exhausted in at least one of packet number space. + */ +static int conn_check_pkt_num_exhausted(ngtcp2_conn *conn) { + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + + return (in_pktns && in_pktns->tx.last_pkt_num == NGTCP2_MAX_PKT_NUM) || + (hs_pktns && hs_pktns->tx.last_pkt_num == NGTCP2_MAX_PKT_NUM) || + conn->pktns.tx.last_pkt_num == NGTCP2_MAX_PKT_NUM; +} + +/* + * conn_retransmit_retry_early retransmits 0RTT packet after Retry is + * received from server. + */ +static ngtcp2_ssize conn_retransmit_retry_early(ngtcp2_conn *conn, + ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + uint8_t flags, + ngtcp2_tstamp ts) { + return conn_write_pkt(conn, pi, dest, destlen, NULL, NGTCP2_PKT_0RTT, flags, + ts); +} + +/* + * conn_handshake_probe_left returns nonzero if there are probe + * packets to be sent for Initial or Handshake packet number space + * left. + */ +static int conn_handshake_probe_left(ngtcp2_conn *conn) { + return (conn->in_pktns && conn->in_pktns->rtb.probe_pkt_left) || + conn->hs_pktns->rtb.probe_pkt_left; +} + +/* + * conn_validate_early_transport_params_limits validates that the + * limits in transport parameters remembered by client for early data + * are not reduced. This function is only used by client and should + * only be called when early data is accepted by server. + */ +static int conn_validate_early_transport_params_limits(ngtcp2_conn *conn) { + const ngtcp2_transport_params *params = conn->remote.transport_params; + + assert(!conn->server); + assert(params); + + if (conn->early.transport_params.active_connection_id_limit > + params->active_connection_id_limit || + conn->early.transport_params.initial_max_data > + params->initial_max_data || + conn->early.transport_params.initial_max_stream_data_bidi_local > + params->initial_max_stream_data_bidi_local || + conn->early.transport_params.initial_max_stream_data_bidi_remote > + params->initial_max_stream_data_bidi_remote || + conn->early.transport_params.initial_max_stream_data_uni > + params->initial_max_stream_data_uni || + conn->early.transport_params.initial_max_streams_bidi > + params->initial_max_streams_bidi || + conn->early.transport_params.initial_max_streams_uni > + params->initial_max_streams_uni || + conn->early.transport_params.max_datagram_frame_size > + params->max_datagram_frame_size) { + return NGTCP2_ERR_PROTO; + } + + return 0; +} + +/* + * conn_write_handshake writes QUIC handshake packets to the buffer + * pointed by |dest| of length |destlen|. |write_datalen| specifies + * the expected length of 0RTT or 1RTT packet payload. Specify 0 to + * |write_datalen| if there is no such data. + * + * This function returns the number of bytes written to the buffer, or + * one of the following negative error codes: + * + * NGTCP2_ERR_PKT_NUM_EXHAUSTED + * Packet number is exhausted. + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM + * Required transport parameter is missing. + * NGTCP2_CS_CLOSING + * Connection is in closing state. + * NGTCP2_CS_DRAINING + * Connection is in draining state. + * + * In addition to the above negative error codes, the same error codes + * from conn_recv_pkt may also be returned. + */ +static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + uint64_t write_datalen, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_ssize res = 0, nwrite = 0, early_spktlen = 0; + size_t origlen = destlen; + uint64_t pending_early_datalen; + ngtcp2_dcid *dcid; + ngtcp2_preferred_addr *paddr; + + switch (conn->state) { + case NGTCP2_CS_CLIENT_INITIAL: + pending_early_datalen = conn_retry_early_payloadlen(conn); + if (pending_early_datalen) { + write_datalen = pending_early_datalen; + } + + if (!(conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY)) { + nwrite = + conn_write_client_initial(conn, pi, dest, destlen, write_datalen, ts); + if (nwrite <= 0) { + return nwrite; + } + } else { + nwrite = conn_write_handshake_pkt( + conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, + NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts); + if (nwrite < 0) { + return nwrite; + } + } + + if (pending_early_datalen) { + early_spktlen = conn_retransmit_retry_early( + conn, pi, dest + nwrite, destlen - (size_t)nwrite, + nwrite ? NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING + : NGTCP2_WRITE_PKT_FLAG_NONE, + ts); + + if (early_spktlen < 0) { + assert(ngtcp2_err_is_fatal((int)early_spktlen)); + return early_spktlen; + } + } + + conn->state = NGTCP2_CS_CLIENT_WAIT_HANDSHAKE; + + res = nwrite + early_spktlen; + + return res; + case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE: + if (!conn_handshake_probe_left(conn) && conn_cwnd_is_zero(conn)) { + destlen = 0; + } else { + if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) { + pending_early_datalen = conn_retry_early_payloadlen(conn); + if (pending_early_datalen) { + write_datalen = pending_early_datalen; + } + } + + nwrite = + conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); + if (nwrite < 0) { + return nwrite; + } + + res += nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + } + + if (!conn_is_handshake_completed(conn)) { + if (!(conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED)) { + nwrite = conn_retransmit_retry_early(conn, pi, dest, destlen, + NGTCP2_WRITE_PKT_FLAG_NONE, ts); + if (nwrite < 0) { + return nwrite; + } + + res += nwrite; + } + + if (res == 0) { + nwrite = conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts); + if (nwrite < 0) { + return nwrite; + } + res = nwrite; + } + + return res; + } + + if (!(conn->flags & NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED)) { + return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM; + } + + if ((conn->flags & NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED) && + !(conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED)) { + rv = conn_validate_early_transport_params_limits(conn); + if (rv != 0) { + return rv; + } + } + + /* Server might increase stream data limits. Extend it if we have + streams created for early data. */ + rv = conn_sync_stream_data_limit(conn); + if (rv != 0) { + return rv; + } + + conn->state = NGTCP2_CS_POST_HANDSHAKE; + + assert(conn->remote.transport_params); + + if (conn->remote.transport_params->preferred_address_present) { + assert(!ngtcp2_ringbuf_full(&conn->dcid.unused.rb)); + + paddr = &conn->remote.transport_params->preferred_address; + dcid = ngtcp2_ringbuf_push_back(&conn->dcid.unused.rb); + ngtcp2_dcid_init(dcid, 1, &paddr->cid, paddr->stateless_reset_token); + + rv = ngtcp2_gaptr_push(&conn->dcid.seqgap, 1, 1); + if (rv != 0) { + return (ngtcp2_ssize)rv; + } + } + + if (conn->remote.transport_params->stateless_reset_token_present) { + assert(conn->dcid.current.seq == 0); + assert(!(conn->dcid.current.flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT)); + ngtcp2_dcid_set_token( + &conn->dcid.current, + conn->remote.transport_params->stateless_reset_token); + } + + rv = conn_call_activate_dcid(conn, &conn->dcid.current); + if (rv != 0) { + return rv; + } + + conn_process_early_rtb(conn); + + if (!conn->local.settings.no_pmtud) { + rv = conn_start_pmtud(conn); + if (rv != 0) { + return rv; + } + } + + return res; + case NGTCP2_CS_SERVER_INITIAL: + nwrite = + conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); + if (nwrite < 0) { + return nwrite; + } + + if (nwrite) { + conn->state = NGTCP2_CS_SERVER_WAIT_HANDSHAKE; + } + + return nwrite; + case NGTCP2_CS_SERVER_WAIT_HANDSHAKE: + if (conn_handshake_probe_left(conn) || !conn_cwnd_is_zero(conn)) { + nwrite = + conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts); + if (nwrite < 0) { + return nwrite; + } + + res += nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + } + + if (res == 0) { + nwrite = conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts); + if (nwrite < 0) { + return nwrite; + } + + res += nwrite; + dest += nwrite; + origlen -= (size_t)nwrite; + } + + return res; + case NGTCP2_CS_CLOSING: + return NGTCP2_ERR_CLOSING; + case NGTCP2_CS_DRAINING: + return NGTCP2_ERR_DRAINING; + default: + return 0; + } +} + +/** + * @function + * + * `conn_client_write_handshake` writes client side handshake data and + * 0RTT packet. + * + * In order to send STREAM data in 0RTT packet, specify + * |vmsg|->stream. |vmsg|->stream.strm, |vmsg|->stream.fin, + * |vmsg|->stream.data, and |vmsg|->stream.datacnt are stream to which + * 0-RTT data is sent, whether it is a last data chunk in this stream, + * a vector of 0-RTT data, and its number of elements respectively. + * The amount of 0RTT data sent is assigned to + * *|vmsg|->stream.pdatalen. If no data is sent, -1 is assigned. + * Note that 0 length STREAM frame is allowed in QUIC, so 0 might be + * assigned to *|vmsg|->stream.pdatalen. + * + * This function returns 0 if it cannot write any frame because buffer + * is too small, or packet is congestion limited. Application should + * keep reading and wait for congestion window to grow. + * + * This function returns the number of bytes written to the buffer + * pointed by |dest| if it succeeds, or one of the following negative + * error codes: (TBD). + */ +static ngtcp2_ssize conn_client_write_handshake(ngtcp2_conn *conn, + ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + ngtcp2_vmsg *vmsg, + ngtcp2_tstamp ts) { + int send_stream = 0; + int send_datagram = 0; + ngtcp2_ssize spktlen, early_spktlen; + uint64_t datalen; + uint64_t write_datalen = 0; + uint8_t wflags = NGTCP2_WRITE_PKT_FLAG_NONE; + int ppe_pending = (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) != 0; + uint32_t version; + + assert(!conn->server); + + /* conn->early.ckm might be created in the first call of + conn_handshake(). Check it later. */ + if (vmsg) { + switch (vmsg->type) { + case NGTCP2_VMSG_TYPE_STREAM: + datalen = ngtcp2_vec_len(vmsg->stream.data, vmsg->stream.datacnt); + send_stream = + conn_retry_early_payloadlen(conn) == 0 && + /* 0 length STREAM frame is allowed */ + (datalen == 0 || + (datalen > 0 && + (vmsg->stream.strm->tx.max_offset - vmsg->stream.strm->tx.offset) && + (conn->tx.max_offset - conn->tx.offset))); + if (send_stream) { + write_datalen = + conn_enforce_flow_control(conn, vmsg->stream.strm, datalen); + write_datalen = + ngtcp2_min(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN); + write_datalen += NGTCP2_STREAM_OVERHEAD; + + if (vmsg->stream.flags & NGTCP2_WRITE_STREAM_FLAG_MORE) { + wflags |= NGTCP2_WRITE_PKT_FLAG_MORE; + } + } else { + vmsg = NULL; + } + break; + case NGTCP2_VMSG_TYPE_DATAGRAM: + datalen = ngtcp2_vec_len(vmsg->datagram.data, vmsg->datagram.datacnt); + send_datagram = conn_retry_early_payloadlen(conn) == 0; + if (send_datagram) { + write_datalen = datalen + NGTCP2_DATAGRAM_OVERHEAD; + + if (vmsg->datagram.flags & NGTCP2_WRITE_DATAGRAM_FLAG_MORE) { + wflags |= NGTCP2_WRITE_PKT_FLAG_MORE; + } + } else { + vmsg = NULL; + } + break; + } + } + + if (!ppe_pending) { + spktlen = conn_write_handshake(conn, pi, dest, destlen, write_datalen, ts); + + if (spktlen < 0) { + return spktlen; + } + + if ((conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) || + !conn->early.ckm || (!send_stream && !send_datagram)) { + return spktlen; + } + + /* If spktlen > 0, we are making a compound packet. If Initial + packet is written, we have to pad bytes to 0-RTT packet. */ + version = conn->negotiated_version ? conn->negotiated_version + : conn->client_chosen_version; + if (spktlen > 0 && + ngtcp2_pkt_get_type_long(version, dest[0]) == NGTCP2_PKT_INITIAL) { + wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + conn->pkt.require_padding = 1; + } else { + conn->pkt.require_padding = 0; + } + } else { + assert(!conn->pktns.crypto.rx.ckm); + assert(!conn->pktns.crypto.tx.ckm); + assert(conn->early.ckm); + + if (conn->pkt.require_padding) { + wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + } + spktlen = conn->pkt.hs_spktlen; + } + + dest += spktlen; + destlen -= (size_t)spktlen; + + if (conn_cwnd_is_zero(conn)) { + return spktlen; + } + + early_spktlen = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_0RTT, + wflags, ts); + + if (early_spktlen < 0) { + switch (early_spktlen) { + case NGTCP2_ERR_STREAM_DATA_BLOCKED: + return spktlen; + case NGTCP2_ERR_WRITE_MORE: + conn->pkt.hs_spktlen = spktlen; + break; + } + return early_spktlen; + } + + return spktlen + early_spktlen; +} + +void ngtcp2_conn_handshake_completed(ngtcp2_conn *conn) { + conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED; + if (conn->server) { + conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED; + } +} + +int ngtcp2_conn_get_handshake_completed(ngtcp2_conn *conn) { + return conn_is_handshake_completed(conn) && + (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED); +} + +int ngtcp2_conn_sched_ack(ngtcp2_conn *conn, ngtcp2_acktr *acktr, + int64_t pkt_num, int active_ack, ngtcp2_tstamp ts) { + int rv; + (void)conn; + + rv = ngtcp2_acktr_add(acktr, pkt_num, active_ack, ts); + if (rv != 0) { + assert(rv != NGTCP2_ERR_INVALID_ARGUMENT); + return rv; + } + + return 0; +} + +int ngtcp2_accept(ngtcp2_pkt_hd *dest, const uint8_t *pkt, size_t pktlen) { + ngtcp2_ssize nread; + ngtcp2_pkt_hd hd, *p; + + if (dest) { + p = dest; + } else { + p = &hd; + } + + if (pktlen == 0 || (pkt[0] & NGTCP2_HEADER_FORM_BIT) == 0) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + nread = ngtcp2_pkt_decode_hd_long(p, pkt, pktlen); + if (nread < 0) { + return (int)nread; + } + + switch (p->type) { + case NGTCP2_PKT_INITIAL: + break; + case NGTCP2_PKT_0RTT: + /* 0-RTT packet may arrive before Initial packet due to + re-ordering. ngtcp2 does not buffer 0RTT packet unless the + very first Initial packet is received or token is received. */ + return NGTCP2_ERR_RETRY; + default: + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (pktlen < NGTCP2_MAX_UDP_PAYLOAD_SIZE || + (p->tokenlen == 0 && p->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + return 0; +} + +int ngtcp2_conn_install_initial_key( + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx, + const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) { + ngtcp2_pktns *pktns = conn->in_pktns; + int rv; + + assert(ivlen >= 8); + assert(pktns); + + conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.rx.hp_ctx); + pktns->crypto.rx.hp_ctx.native_handle = NULL; + + if (pktns->crypto.rx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.rx.ckm->aead_ctx); + ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, conn->mem); + pktns->crypto.rx.ckm = NULL; + } + + conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.tx.hp_ctx); + pktns->crypto.tx.hp_ctx.native_handle = NULL; + + if (pktns->crypto.tx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.tx.ckm->aead_ctx); + ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem); + pktns->crypto.tx.ckm = NULL; + } + + rv = ngtcp2_crypto_km_new(&pktns->crypto.rx.ckm, NULL, 0, NULL, rx_iv, ivlen, + conn->mem); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_crypto_km_new(&pktns->crypto.tx.ckm, NULL, 0, NULL, tx_iv, ivlen, + conn->mem); + if (rv != 0) { + return rv; + } + + /* Take owner ship after we are sure that no failure occurs, so that + caller can delete these contexts on failure. */ + pktns->crypto.rx.ckm->aead_ctx = *rx_aead_ctx; + pktns->crypto.rx.hp_ctx = *rx_hp_ctx; + pktns->crypto.tx.ckm->aead_ctx = *tx_aead_ctx; + pktns->crypto.tx.hp_ctx = *tx_hp_ctx; + + return 0; +} + +int ngtcp2_conn_install_vneg_initial_key( + ngtcp2_conn *conn, uint32_t version, + const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv, + const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) { + int rv; + + assert(ivlen >= 8); + + conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.rx.hp_ctx); + conn->vneg.rx.hp_ctx.native_handle = NULL; + + if (conn->vneg.rx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.rx.ckm->aead_ctx); + ngtcp2_crypto_km_del(conn->vneg.rx.ckm, conn->mem); + conn->vneg.rx.ckm = NULL; + } + + conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.tx.hp_ctx); + conn->vneg.tx.hp_ctx.native_handle = NULL; + + if (conn->vneg.tx.ckm) { + conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.tx.ckm->aead_ctx); + ngtcp2_crypto_km_del(conn->vneg.tx.ckm, conn->mem); + conn->vneg.tx.ckm = NULL; + } + + rv = ngtcp2_crypto_km_new(&conn->vneg.rx.ckm, NULL, 0, NULL, rx_iv, ivlen, + conn->mem); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_crypto_km_new(&conn->vneg.tx.ckm, NULL, 0, NULL, tx_iv, ivlen, + conn->mem); + if (rv != 0) { + return rv; + } + + /* Take owner ship after we are sure that no failure occurs, so that + caller can delete these contexts on failure. */ + conn->vneg.rx.ckm->aead_ctx = *rx_aead_ctx; + conn->vneg.rx.hp_ctx = *rx_hp_ctx; + conn->vneg.tx.ckm->aead_ctx = *tx_aead_ctx; + conn->vneg.tx.hp_ctx = *tx_hp_ctx; + conn->vneg.version = version; + + return 0; +} + +int ngtcp2_conn_install_rx_handshake_key( + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) { + ngtcp2_pktns *pktns = conn->hs_pktns; + int rv; + + assert(ivlen >= 8); + assert(pktns); + assert(!pktns->crypto.rx.hp_ctx.native_handle); + assert(!pktns->crypto.rx.ckm); + + rv = ngtcp2_crypto_km_new(&pktns->crypto.rx.ckm, NULL, 0, aead_ctx, iv, ivlen, + conn->mem); + if (rv != 0) { + return rv; + } + + pktns->crypto.rx.hp_ctx = *hp_ctx; + + rv = conn_call_recv_rx_key(conn, NGTCP2_CRYPTO_LEVEL_HANDSHAKE); + if (rv != 0) { + ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, conn->mem); + pktns->crypto.rx.ckm = NULL; + + memset(&pktns->crypto.rx.hp_ctx, 0, sizeof(pktns->crypto.rx.hp_ctx)); + + return rv; + } + + return 0; +} + +int ngtcp2_conn_install_tx_handshake_key( + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) { + ngtcp2_pktns *pktns = conn->hs_pktns; + int rv; + + assert(ivlen >= 8); + assert(pktns); + assert(!pktns->crypto.tx.hp_ctx.native_handle); + assert(!pktns->crypto.tx.ckm); + + rv = ngtcp2_crypto_km_new(&pktns->crypto.tx.ckm, NULL, 0, aead_ctx, iv, ivlen, + conn->mem); + if (rv != 0) { + return rv; + } + + pktns->crypto.tx.hp_ctx = *hp_ctx; + + if (conn->server) { + rv = ngtcp2_conn_commit_local_transport_params(conn); + if (rv != 0) { + return rv; + } + } + + rv = conn_call_recv_tx_key(conn, NGTCP2_CRYPTO_LEVEL_HANDSHAKE); + if (rv != 0) { + ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem); + pktns->crypto.tx.ckm = NULL; + + memset(&pktns->crypto.tx.hp_ctx, 0, sizeof(pktns->crypto.tx.hp_ctx)); + + return rv; + } + + return 0; +} + +int ngtcp2_conn_install_early_key(ngtcp2_conn *conn, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, + const ngtcp2_crypto_cipher_ctx *hp_ctx) { + int rv; + + assert(ivlen >= 8); + assert(!conn->early.hp_ctx.native_handle); + assert(!conn->early.ckm); + + rv = ngtcp2_crypto_km_new(&conn->early.ckm, NULL, 0, aead_ctx, iv, ivlen, + conn->mem); + if (rv != 0) { + return rv; + } + + conn->early.hp_ctx = *hp_ctx; + + conn->flags |= NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED; + + if (conn->server) { + rv = conn_call_recv_rx_key(conn, NGTCP2_CRYPTO_LEVEL_EARLY); + } else { + rv = conn_call_recv_tx_key(conn, NGTCP2_CRYPTO_LEVEL_EARLY); + } + if (rv != 0) { + ngtcp2_crypto_km_del(conn->early.ckm, conn->mem); + conn->early.ckm = NULL; + + memset(&conn->early.hp_ctx, 0, sizeof(conn->early.hp_ctx)); + + return rv; + } + + return 0; +} + +int ngtcp2_conn_install_rx_key(ngtcp2_conn *conn, const uint8_t *secret, + size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, + const ngtcp2_crypto_cipher_ctx *hp_ctx) { + ngtcp2_pktns *pktns = &conn->pktns; + int rv; + + assert(ivlen >= 8); + assert(!pktns->crypto.rx.hp_ctx.native_handle); + assert(!pktns->crypto.rx.ckm); + + rv = ngtcp2_crypto_km_new(&pktns->crypto.rx.ckm, secret, secretlen, aead_ctx, + iv, ivlen, conn->mem); + if (rv != 0) { + return rv; + } + + pktns->crypto.rx.hp_ctx = *hp_ctx; + + if (!conn->server) { + if (conn->remote.pending_transport_params) { + ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem); + + conn->remote.transport_params = conn->remote.pending_transport_params; + conn->remote.pending_transport_params = NULL; + conn_sync_stream_id_limit(conn); + conn->tx.max_offset = conn->remote.transport_params->initial_max_data; + } + + if (conn->early.ckm) { + conn_discard_early_key(conn); + } + } + + rv = conn_call_recv_rx_key(conn, NGTCP2_CRYPTO_LEVEL_APPLICATION); + if (rv != 0) { + ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, conn->mem); + pktns->crypto.rx.ckm = NULL; + + memset(&pktns->crypto.rx.hp_ctx, 0, sizeof(pktns->crypto.rx.hp_ctx)); + + return rv; + } + + return 0; +} + +int ngtcp2_conn_install_tx_key(ngtcp2_conn *conn, const uint8_t *secret, + size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, + const ngtcp2_crypto_cipher_ctx *hp_ctx) { + ngtcp2_pktns *pktns = &conn->pktns; + int rv; + + assert(ivlen >= 8); + assert(!pktns->crypto.tx.hp_ctx.native_handle); + assert(!pktns->crypto.tx.ckm); + + rv = ngtcp2_crypto_km_new(&pktns->crypto.tx.ckm, secret, secretlen, aead_ctx, + iv, ivlen, conn->mem); + if (rv != 0) { + return rv; + } + + pktns->crypto.tx.hp_ctx = *hp_ctx; + + if (conn->server) { + if (conn->remote.pending_transport_params) { + ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem); + + conn->remote.transport_params = conn->remote.pending_transport_params; + conn->remote.pending_transport_params = NULL; + conn_sync_stream_id_limit(conn); + conn->tx.max_offset = conn->remote.transport_params->initial_max_data; + } + } else if (conn->early.ckm) { + conn_discard_early_key(conn); + } + + rv = conn_call_recv_tx_key(conn, NGTCP2_CRYPTO_LEVEL_APPLICATION); + if (rv != 0) { + ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem); + pktns->crypto.tx.ckm = NULL; + + memset(&pktns->crypto.tx.hp_ctx, 0, sizeof(pktns->crypto.tx.hp_ctx)); + + return rv; + } + + return 0; +} + +int ngtcp2_conn_initiate_key_update(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + ngtcp2_tstamp confirmed_ts = conn->crypto.key_update.confirmed_ts; + ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns); + + assert(conn->state == NGTCP2_CS_POST_HANDSHAKE); + + if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) || + (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) || + !conn->crypto.key_update.new_tx_ckm || + !conn->crypto.key_update.new_rx_ckm || + (confirmed_ts != UINT64_MAX && confirmed_ts + 3 * pto > ts)) { + return NGTCP2_ERR_INVALID_STATE; + } + + conn_rotate_keys(conn, NGTCP2_MAX_PKT_NUM, /* initiator = */ 1); + + return 0; +} + +/* + * conn_retire_stale_bound_dcid retires stale destination connection + * ID in conn->dcid.bound to keep some unused destination connection + * IDs available. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_retire_stale_bound_dcid(ngtcp2_conn *conn, + ngtcp2_duration timeout, + ngtcp2_tstamp ts) { + size_t i; + ngtcp2_dcid *dcid, *last; + int rv; + + for (i = 0; i < ngtcp2_ringbuf_len(&conn->dcid.bound.rb);) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i); + + assert(dcid->cid.datalen); + + if (dcid->bound_ts + timeout > ts) { + ++i; + continue; + } + + rv = conn_retire_dcid_seq(conn, dcid->seq); + if (rv != 0) { + return rv; + } + + if (i == 0) { + ngtcp2_ringbuf_pop_front(&conn->dcid.bound.rb); + continue; + } + + if (i == ngtcp2_ringbuf_len(&conn->dcid.bound.rb) - 1) { + ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb); + break; + } + + last = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, + ngtcp2_ringbuf_len(&conn->dcid.bound.rb) - 1); + ngtcp2_dcid_copy(dcid, last); + ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb); + } + + return 0; +} + +ngtcp2_tstamp ngtcp2_conn_loss_detection_expiry(ngtcp2_conn *conn) { + return conn->cstat.loss_detection_timer; +} + +ngtcp2_tstamp ngtcp2_conn_internal_expiry(ngtcp2_conn *conn) { + ngtcp2_tstamp res = UINT64_MAX, t; + ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns); + ngtcp2_scid *scid; + ngtcp2_dcid *dcid; + size_t i, len; + + if (conn->pv) { + res = ngtcp2_pv_next_expiry(conn->pv); + } + + if (conn->pmtud) { + res = ngtcp2_min(res, conn->pmtud->expiry); + } + + if (!ngtcp2_pq_empty(&conn->scid.used)) { + scid = ngtcp2_struct_of(ngtcp2_pq_top(&conn->scid.used), ngtcp2_scid, pe); + if (scid->retired_ts != UINT64_MAX) { + t = scid->retired_ts + pto; + res = ngtcp2_min(res, t); + } + } + + if (ngtcp2_ringbuf_len(&conn->dcid.retired.rb)) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, 0); + t = dcid->retired_ts + pto; + res = ngtcp2_min(res, t); + } + + if (conn->dcid.current.cid.datalen) { + len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb); + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i); + + assert(dcid->cid.datalen); + assert(dcid->bound_ts != UINT64_MAX); + + t = dcid->bound_ts + 3 * pto; + res = ngtcp2_min(res, t); + } + } + + if (conn->server && conn->early.ckm && + conn->early.discard_started_ts != UINT64_MAX) { + t = conn->early.discard_started_ts + 3 * pto; + res = ngtcp2_min(res, t); + } + + return res; +} + +ngtcp2_tstamp ngtcp2_conn_ack_delay_expiry(ngtcp2_conn *conn) { + ngtcp2_acktr *acktr = &conn->pktns.acktr; + + if (!(acktr->flags & NGTCP2_ACKTR_FLAG_CANCEL_TIMER) && + acktr->first_unacked_ts != UINT64_MAX) { + return acktr->first_unacked_ts + conn_compute_ack_delay(conn); + } + return UINT64_MAX; +} + +static ngtcp2_tstamp conn_handshake_expiry(ngtcp2_conn *conn) { + if (conn_is_handshake_completed(conn) || + conn->local.settings.handshake_timeout == UINT64_MAX) { + return UINT64_MAX; + } + + return conn->local.settings.initial_ts + + conn->local.settings.handshake_timeout; +} + +ngtcp2_tstamp ngtcp2_conn_get_expiry(ngtcp2_conn *conn) { + ngtcp2_tstamp t1 = ngtcp2_conn_loss_detection_expiry(conn); + ngtcp2_tstamp t2 = ngtcp2_conn_ack_delay_expiry(conn); + ngtcp2_tstamp t3 = ngtcp2_conn_internal_expiry(conn); + ngtcp2_tstamp t4 = ngtcp2_conn_lost_pkt_expiry(conn); + ngtcp2_tstamp t5 = conn_keep_alive_expiry(conn); + ngtcp2_tstamp t6 = conn_handshake_expiry(conn); + ngtcp2_tstamp t7 = ngtcp2_conn_get_idle_expiry(conn); + ngtcp2_tstamp res = ngtcp2_min(t1, t2); + res = ngtcp2_min(res, t3); + res = ngtcp2_min(res, t4); + res = ngtcp2_min(res, t5); + res = ngtcp2_min(res, t6); + res = ngtcp2_min(res, t7); + return ngtcp2_min(res, conn->tx.pacing.next_ts); +} + +int ngtcp2_conn_handle_expiry(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + int rv; + ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns); + + assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING)); + + if (ngtcp2_conn_get_idle_expiry(conn) <= ts) { + return NGTCP2_ERR_IDLE_CLOSE; + } + + ngtcp2_conn_cancel_expired_ack_delay_timer(conn, ts); + + conn_cancel_expired_keep_alive_timer(conn, ts); + + conn_cancel_expired_pkt_tx_timer(conn, ts); + + ngtcp2_conn_remove_lost_pkt(conn, ts); + + if (conn->pv) { + ngtcp2_pv_cancel_expired_timer(conn->pv, ts); + } + + if (conn->pmtud) { + ngtcp2_pmtud_handle_expiry(conn->pmtud, ts); + if (ngtcp2_pmtud_finished(conn->pmtud)) { + ngtcp2_conn_stop_pmtud(conn); + } + } + + if (ngtcp2_conn_loss_detection_expiry(conn) <= ts) { + rv = ngtcp2_conn_on_loss_detection_timer(conn, ts); + if (rv != 0) { + return rv; + } + } + + if (conn->dcid.current.cid.datalen) { + rv = conn_retire_stale_bound_dcid(conn, 3 * pto, ts); + if (rv != 0) { + return rv; + } + } + + rv = conn_remove_retired_connection_id(conn, pto, ts); + if (rv != 0) { + return rv; + } + + if (conn->server && conn->early.ckm && + conn->early.discard_started_ts != UINT64_MAX) { + if (conn->early.discard_started_ts + 3 * pto <= ts) { + conn_discard_early_key(conn); + } + } + + if (!conn_is_handshake_completed(conn) && + conn->local.settings.handshake_timeout != UINT64_MAX && + conn->local.settings.initial_ts + + conn->local.settings.handshake_timeout <= + ts) { + return NGTCP2_ERR_HANDSHAKE_TIMEOUT; + } + + return 0; +} + +static void acktr_cancel_expired_ack_delay_timer(ngtcp2_acktr *acktr, + ngtcp2_duration max_ack_delay, + ngtcp2_tstamp ts) { + if (!(acktr->flags & NGTCP2_ACKTR_FLAG_CANCEL_TIMER) && + acktr->first_unacked_ts != UINT64_MAX && + acktr->first_unacked_ts + max_ack_delay <= ts) { + acktr->flags |= NGTCP2_ACKTR_FLAG_CANCEL_TIMER; + } +} + +void ngtcp2_conn_cancel_expired_ack_delay_timer(ngtcp2_conn *conn, + ngtcp2_tstamp ts) { + ngtcp2_duration ack_delay = conn_compute_ack_delay(conn); + + if (conn->in_pktns) { + acktr_cancel_expired_ack_delay_timer(&conn->in_pktns->acktr, 0, ts); + } + if (conn->hs_pktns) { + acktr_cancel_expired_ack_delay_timer(&conn->hs_pktns->acktr, 0, ts); + } + acktr_cancel_expired_ack_delay_timer(&conn->pktns.acktr, ack_delay, ts); +} + +ngtcp2_tstamp ngtcp2_conn_lost_pkt_expiry(ngtcp2_conn *conn) { + ngtcp2_tstamp res = UINT64_MAX, ts; + + if (conn->in_pktns) { + ts = ngtcp2_rtb_lost_pkt_ts(&conn->in_pktns->rtb); + if (ts != UINT64_MAX) { + ts += conn_compute_pto(conn, conn->in_pktns); + res = ngtcp2_min(res, ts); + } + } + + if (conn->hs_pktns) { + ts = ngtcp2_rtb_lost_pkt_ts(&conn->hs_pktns->rtb); + if (ts != UINT64_MAX) { + ts += conn_compute_pto(conn, conn->hs_pktns); + res = ngtcp2_min(res, ts); + } + } + + ts = ngtcp2_rtb_lost_pkt_ts(&conn->pktns.rtb); + if (ts != UINT64_MAX) { + ts += conn_compute_pto(conn, &conn->pktns); + res = ngtcp2_min(res, ts); + } + + return res; +} + +void ngtcp2_conn_remove_lost_pkt(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + ngtcp2_duration pto; + + if (conn->in_pktns) { + pto = conn_compute_pto(conn, conn->in_pktns); + ngtcp2_rtb_remove_expired_lost_pkt(&conn->in_pktns->rtb, pto, ts); + } + if (conn->hs_pktns) { + pto = conn_compute_pto(conn, conn->hs_pktns); + ngtcp2_rtb_remove_expired_lost_pkt(&conn->hs_pktns->rtb, pto, ts); + } + pto = conn_compute_pto(conn, &conn->pktns); + ngtcp2_rtb_remove_expired_lost_pkt(&conn->pktns.rtb, pto, ts); +} + +/* + * select_preferred_version selects the most preferred version. + * |fallback_version| is chosen if no preference is made, or + * |preferred_versions| does not include any of |chosen_version| or + * |available_versions|. |chosen_version| is treated as an extra + * other version. + */ +static uint32_t select_preferred_version(const uint32_t *preferred_versions, + size_t preferred_versionslen, + uint32_t chosen_version, + const uint8_t *available_versions, + size_t available_versionslen, + uint32_t fallback_version) { + size_t i, j; + const uint8_t *p; + uint32_t v; + + if (!preferred_versionslen || + (!available_versionslen && chosen_version == fallback_version)) { + return fallback_version; + } + + for (i = 0; i < preferred_versionslen; ++i) { + if (preferred_versions[i] == chosen_version) { + return chosen_version; + } + for (j = 0, p = available_versions; j < available_versionslen; + j += sizeof(uint32_t)) { + p = ngtcp2_get_uint32(&v, p); + + if (preferred_versions[i] == v) { + return v; + } + } + } + + return fallback_version; +} + +/* + * conn_client_validate_transport_params validates |params| as client. + * |params| must be sent with Encrypted Extensions. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_TRANSPORT_PARAM + * params contains preferred address but server chose zero-length + * connection ID. + * NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE + * Validation against version negotiation parameters failed. + */ +static int +conn_client_validate_transport_params(ngtcp2_conn *conn, + const ngtcp2_transport_params *params) { + if (!ngtcp2_cid_eq(&conn->rcid, ¶ms->original_dcid)) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + + if (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) { + if (!params->retry_scid_present) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + if (!ngtcp2_cid_eq(&conn->retry_scid, ¶ms->retry_scid)) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + } else if (params->retry_scid_present) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + + if (params->preferred_address_present && + conn->dcid.current.cid.datalen == 0) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + + if (params->version_info_present) { + if (conn->negotiated_version != params->version_info.chosen_version) { + return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE; + } + + assert(vneg_available_versions_includes(conn->vneg.available_versions, + conn->vneg.available_versionslen, + conn->negotiated_version)); + } else if (conn->client_chosen_version != conn->negotiated_version) { + return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE; + } + + /* When client reacted upon Version Negotiation */ + if (conn->local.settings.original_version != conn->client_chosen_version) { + if (!params->version_info_present) { + assert(conn->client_chosen_version == conn->negotiated_version); + + /* QUIC v1 (and the supported draft versions) are treated + specially. If version_info is missing, no further validation + is necessary. + https://datatracker.ietf.org/doc/html/draft-ietf-quic-version-negotiation-10#section-8 + */ + if (conn->client_chosen_version == NGTCP2_PROTO_VER_V1 || + (NGTCP2_PROTO_VER_DRAFT_MIN <= conn->client_chosen_version && + conn->client_chosen_version <= NGTCP2_PROTO_VER_DRAFT_MAX)) { + return 0; + } + + return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE; + } + + /* Server choose original version after Version Negotiation. + Draft does not say this particular case, but this smells like + misbehaved server because server should accept original_version + in the original connection. */ + if (conn->local.settings.original_version == + params->version_info.chosen_version) { + return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE; + } + + /* Check version downgrade on incompatible version negotiation. */ + if (params->version_info.available_versionslen == 0) { + return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE; + } + + if (conn->client_chosen_version != + select_preferred_version(conn->vneg.preferred_versions, + conn->vneg.preferred_versionslen, + params->version_info.chosen_version, + params->version_info.available_versions, + params->version_info.available_versionslen, + /* fallback_version = */ 0)) { + return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE; + } + } + + return 0; +} + +uint32_t +ngtcp2_conn_server_negotiate_version(ngtcp2_conn *conn, + const ngtcp2_version_info *version_info) { + assert(conn->server); + assert(conn->client_chosen_version == version_info->chosen_version); + + return select_preferred_version( + conn->vneg.preferred_versions, conn->vneg.preferred_versionslen, + version_info->chosen_version, version_info->available_versions, + version_info->available_versionslen, version_info->chosen_version); +} + +int ngtcp2_conn_set_remote_transport_params( + ngtcp2_conn *conn, const ngtcp2_transport_params *params) { + int rv; + + /* We expect this function is called once per QUIC connection, but + GnuTLS server seems to call TLS extension callback twice if it + sends HelloRetryRequest. In practice, same QUIC transport + parameters are sent in the 2nd client flight, just returning 0 + would cause no harm. */ + if (conn->flags & NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED) { + return 0; + } + + /* Assume that ngtcp2_decode_transport_params sets default value if + active_connection_id_limit is omitted. */ + if (params->active_connection_id_limit < + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + + /* We assume that conn->dcid.current.cid is still the initial one. + This requires that transport parameter must be fed into + ngtcp2_conn as early as possible. */ + if (!ngtcp2_cid_eq(&conn->dcid.current.cid, ¶ms->initial_scid)) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + + if (params->max_udp_payload_size < NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + + if (conn->server) { + if (params->version_info_present) { + if (!vneg_available_versions_includes( + params->version_info.available_versions, + params->version_info.available_versionslen, + params->version_info.chosen_version)) { + return NGTCP2_ERR_TRANSPORT_PARAM; + } + + if (params->version_info.chosen_version != conn->client_chosen_version) { + return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE; + } + + conn->negotiated_version = + ngtcp2_conn_server_negotiate_version(conn, ¶ms->version_info); + if (conn->negotiated_version != conn->client_chosen_version) { + rv = conn_call_version_negotiation(conn, conn->negotiated_version, + &conn->rcid); + if (rv != 0) { + return rv; + } + } + } else { + conn->negotiated_version = conn->client_chosen_version; + } + + conn->local.transport_params.version_info.chosen_version = + conn->negotiated_version; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "the negotiated version is 0x%08x", + conn->negotiated_version); + } else { + rv = conn_client_validate_transport_params(conn, params); + if (rv != 0) { + return rv; + } + } + + ngtcp2_log_remote_tp(&conn->log, + conn->server + ? NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO + : NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS, + params); + + ngtcp2_qlog_parameters_set_transport_params(&conn->qlog, params, conn->server, + NGTCP2_QLOG_SIDE_REMOTE); + + if ((conn->server && conn->pktns.crypto.tx.ckm) || + (!conn->server && conn->pktns.crypto.rx.ckm)) { + ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem); + conn->remote.transport_params = NULL; + + rv = ngtcp2_transport_params_copy_new(&conn->remote.transport_params, + params, conn->mem); + if (rv != 0) { + return rv; + } + conn_sync_stream_id_limit(conn); + conn->tx.max_offset = conn->remote.transport_params->initial_max_data; + } else { + assert(!conn->remote.pending_transport_params); + + rv = ngtcp2_transport_params_copy_new( + &conn->remote.pending_transport_params, params, conn->mem); + if (rv != 0) { + return rv; + } + } + + conn->flags |= NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED; + + return 0; +} + +int ngtcp2_conn_decode_remote_transport_params(ngtcp2_conn *conn, + const uint8_t *data, + size_t datalen) { + ngtcp2_transport_params params; + int rv; + + rv = ngtcp2_decode_transport_params( + ¶ms, + conn->server ? NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO + : NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS, + data, datalen); + if (rv != 0) { + return rv; + } + + return ngtcp2_conn_set_remote_transport_params(conn, ¶ms); +} + +const ngtcp2_transport_params * +ngtcp2_conn_get_remote_transport_params(ngtcp2_conn *conn) { + if (conn->remote.pending_transport_params) { + return conn->remote.pending_transport_params; + } + + return conn->remote.transport_params; +} + +void ngtcp2_conn_set_early_remote_transport_params_versioned( + ngtcp2_conn *conn, int transport_params_version, + const ngtcp2_transport_params *params) { + ngtcp2_transport_params *p; + (void)transport_params_version; + + assert(!conn->server); + assert(!conn->remote.transport_params); + + /* Assume that all pointer fields in p are NULL */ + p = ngtcp2_mem_calloc(conn->mem, 1, sizeof(*p)); + + conn->remote.transport_params = p; + + p->initial_max_streams_bidi = params->initial_max_streams_bidi; + p->initial_max_streams_uni = params->initial_max_streams_uni; + p->initial_max_stream_data_bidi_local = + params->initial_max_stream_data_bidi_local; + p->initial_max_stream_data_bidi_remote = + params->initial_max_stream_data_bidi_remote; + p->initial_max_stream_data_uni = params->initial_max_stream_data_uni; + p->initial_max_data = params->initial_max_data; + p->active_connection_id_limit = + ngtcp2_max(NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT, + params->active_connection_id_limit); + p->max_idle_timeout = params->max_idle_timeout; + if (!params->max_udp_payload_size) { + p->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE; + } else { + p->max_udp_payload_size = + ngtcp2_max(NGTCP2_MAX_UDP_PAYLOAD_SIZE, params->max_udp_payload_size); + } + p->disable_active_migration = params->disable_active_migration; + p->max_datagram_frame_size = params->max_datagram_frame_size; + + /* These parameters are treated specially. If server accepts early + data, it must not set values for these parameters that are + smaller than these remembered values. */ + conn->early.transport_params.initial_max_streams_bidi = + params->initial_max_streams_bidi; + conn->early.transport_params.initial_max_streams_uni = + params->initial_max_streams_uni; + conn->early.transport_params.initial_max_stream_data_bidi_local = + params->initial_max_stream_data_bidi_local; + conn->early.transport_params.initial_max_stream_data_bidi_remote = + params->initial_max_stream_data_bidi_remote; + conn->early.transport_params.initial_max_stream_data_uni = + params->initial_max_stream_data_uni; + conn->early.transport_params.initial_max_data = params->initial_max_data; + conn->early.transport_params.active_connection_id_limit = + params->active_connection_id_limit; + conn->early.transport_params.max_datagram_frame_size = + params->max_datagram_frame_size; + + conn_sync_stream_id_limit(conn); + + conn->tx.max_offset = p->initial_max_data; + + ngtcp2_qlog_parameters_set_transport_params(&conn->qlog, p, conn->server, + NGTCP2_QLOG_SIDE_REMOTE); +} + +int ngtcp2_conn_set_local_transport_params_versioned( + ngtcp2_conn *conn, int transport_params_version, + const ngtcp2_transport_params *params) { + (void)transport_params_version; + + assert(conn->server); + assert(params->active_connection_id_limit <= NGTCP2_MAX_DCID_POOL_SIZE); + + if (conn->hs_pktns == NULL || conn->hs_pktns->crypto.tx.ckm) { + return NGTCP2_ERR_INVALID_STATE; + } + + conn_set_local_transport_params(conn, params); + + return 0; +} + +int ngtcp2_conn_commit_local_transport_params(ngtcp2_conn *conn) { + const ngtcp2_mem *mem = conn->mem; + ngtcp2_transport_params *params = &conn->local.transport_params; + ngtcp2_scid *scident; + int rv; + + assert(1 == ngtcp2_ksl_len(&conn->scid.set)); + + if (params->active_connection_id_limit == 0) { + params->active_connection_id_limit = + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT; + } + + params->initial_scid = conn->oscid; + + if (conn->oscid.datalen == 0) { + params->preferred_address_present = 0; + } + + if (conn->server && params->preferred_address_present) { + scident = ngtcp2_mem_malloc(mem, sizeof(*scident)); + if (scident == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_scid_init(scident, 1, ¶ms->preferred_address.cid); + + rv = ngtcp2_ksl_insert(&conn->scid.set, NULL, &scident->cid, scident); + if (rv != 0) { + ngtcp2_mem_free(mem, scident); + return rv; + } + + conn->scid.last_seq = 1; + } + + conn->rx.window = conn->rx.unsent_max_offset = conn->rx.max_offset = + params->initial_max_data; + conn->remote.bidi.unsent_max_streams = params->initial_max_streams_bidi; + conn->remote.bidi.max_streams = params->initial_max_streams_bidi; + conn->remote.uni.unsent_max_streams = params->initial_max_streams_uni; + conn->remote.uni.max_streams = params->initial_max_streams_uni; + + conn->flags |= NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED; + + ngtcp2_qlog_parameters_set_transport_params(&conn->qlog, params, conn->server, + NGTCP2_QLOG_SIDE_LOCAL); + + return 0; +} + +const ngtcp2_transport_params * +ngtcp2_conn_get_local_transport_params(ngtcp2_conn *conn) { + return &conn->local.transport_params; +} + +ngtcp2_ssize ngtcp2_conn_encode_local_transport_params(ngtcp2_conn *conn, + uint8_t *dest, + size_t destlen) { + return ngtcp2_encode_transport_params( + dest, destlen, + conn->server ? NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS + : NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO, + &conn->local.transport_params); +} + +int ngtcp2_conn_open_bidi_stream(ngtcp2_conn *conn, int64_t *pstream_id, + void *stream_user_data) { + int rv; + ngtcp2_strm *strm; + + if (ngtcp2_conn_get_streams_bidi_left(conn) == 0) { + return NGTCP2_ERR_STREAM_ID_BLOCKED; + } + + strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc); + if (strm == NULL) { + return NGTCP2_ERR_NOMEM; + } + + rv = ngtcp2_conn_init_stream(conn, strm, conn->local.bidi.next_stream_id, + stream_user_data); + if (rv != 0) { + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm); + return rv; + } + + *pstream_id = conn->local.bidi.next_stream_id; + conn->local.bidi.next_stream_id += 4; + + return 0; +} + +int ngtcp2_conn_open_uni_stream(ngtcp2_conn *conn, int64_t *pstream_id, + void *stream_user_data) { + int rv; + ngtcp2_strm *strm; + + if (ngtcp2_conn_get_streams_uni_left(conn) == 0) { + return NGTCP2_ERR_STREAM_ID_BLOCKED; + } + + strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc); + if (strm == NULL) { + return NGTCP2_ERR_NOMEM; + } + + rv = ngtcp2_conn_init_stream(conn, strm, conn->local.uni.next_stream_id, + stream_user_data); + if (rv != 0) { + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm); + return rv; + } + ngtcp2_strm_shutdown(strm, NGTCP2_STRM_FLAG_SHUT_RD); + + *pstream_id = conn->local.uni.next_stream_id; + conn->local.uni.next_stream_id += 4; + + return 0; +} + +ngtcp2_strm *ngtcp2_conn_find_stream(ngtcp2_conn *conn, int64_t stream_id) { + return ngtcp2_map_find(&conn->strms, (uint64_t)stream_id); +} + +ngtcp2_ssize ngtcp2_conn_write_stream_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen, + ngtcp2_tstamp ts) { + ngtcp2_vec datav; + + datav.len = datalen; + datav.base = (uint8_t *)data; + + return ngtcp2_conn_writev_stream_versioned(conn, path, pkt_info_version, pi, + dest, destlen, pdatalen, flags, + stream_id, &datav, 1, ts); +} + +static ngtcp2_ssize conn_write_vmsg_wrapper(ngtcp2_conn *conn, + ngtcp2_path *path, + int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, ngtcp2_vmsg *vmsg, + ngtcp2_tstamp ts) { + ngtcp2_conn_stat *cstat = &conn->cstat; + ngtcp2_ssize nwrite; + int undersized; + + nwrite = ngtcp2_conn_write_vmsg(conn, path, pkt_info_version, pi, dest, + destlen, vmsg, ts); + if (nwrite < 0) { + return nwrite; + } + + if (cstat->bytes_in_flight >= cstat->cwnd) { + conn->rst.is_cwnd_limited = 1; + } + + if (vmsg == NULL && cstat->bytes_in_flight < cstat->cwnd && + conn->tx.strmq_nretrans == 0) { + if (conn->local.settings.no_tx_udp_payload_size_shaping) { + undersized = + (size_t)nwrite < conn->local.settings.max_tx_udp_payload_size; + } else { + undersized = (size_t)nwrite < conn->dcid.current.max_udp_payload_size; + } + + if (undersized) { + conn->rst.app_limited = conn->rst.delivered + cstat->bytes_in_flight; + + if (conn->rst.app_limited == 0) { + conn->rst.app_limited = cstat->max_tx_udp_payload_size; + } + } + } + + return nwrite; +} + +ngtcp2_ssize ngtcp2_conn_writev_stream_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts) { + ngtcp2_vmsg vmsg, *pvmsg; + ngtcp2_strm *strm; + int64_t datalen; + + if (pdatalen) { + *pdatalen = -1; + } + + if (stream_id != -1) { + strm = ngtcp2_conn_find_stream(conn, stream_id); + if (strm == NULL) { + return NGTCP2_ERR_STREAM_NOT_FOUND; + } + + if (strm->flags & NGTCP2_STRM_FLAG_SHUT_WR) { + return NGTCP2_ERR_STREAM_SHUT_WR; + } + + datalen = ngtcp2_vec_len_varint(datav, datavcnt); + if (datalen == -1) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if ((uint64_t)datalen > NGTCP2_MAX_VARINT - strm->tx.offset || + (uint64_t)datalen > NGTCP2_MAX_VARINT - conn->tx.offset) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + vmsg.type = NGTCP2_VMSG_TYPE_STREAM; + vmsg.stream.strm = strm; + vmsg.stream.flags = flags; + vmsg.stream.data = datav; + vmsg.stream.datacnt = datavcnt; + vmsg.stream.pdatalen = pdatalen; + + pvmsg = &vmsg; + } else { + pvmsg = NULL; + } + + return conn_write_vmsg_wrapper(conn, path, pkt_info_version, pi, dest, + destlen, pvmsg, ts); +} + +ngtcp2_ssize ngtcp2_conn_writev_datagram_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, + uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts) { + ngtcp2_vmsg vmsg; + int64_t datalen; + + if (paccepted) { + *paccepted = 0; + } + + if (conn->remote.transport_params == NULL || + conn->remote.transport_params->max_datagram_frame_size == 0) { + return NGTCP2_ERR_INVALID_STATE; + } + + datalen = ngtcp2_vec_len_varint(datav, datavcnt); + if (datalen == -1 +#if UINT64_MAX > SIZE_MAX + || (uint64_t)datalen > SIZE_MAX +#endif /* UINT64_MAX > SIZE_MAX */ + ) { + return NGTCP2_ERR_INVALID_STATE; + } + + if (conn->remote.transport_params->max_datagram_frame_size < + ngtcp2_pkt_datagram_framelen((size_t)datalen)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + vmsg.type = NGTCP2_VMSG_TYPE_DATAGRAM; + vmsg.datagram.dgram_id = dgram_id; + vmsg.datagram.flags = flags; + vmsg.datagram.data = datav; + vmsg.datagram.datacnt = datavcnt; + vmsg.datagram.paccepted = paccepted; + + return conn_write_vmsg_wrapper(conn, path, pkt_info_version, pi, dest, + destlen, &vmsg, ts); +} + +ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, + int pkt_info_version, ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + ngtcp2_vmsg *vmsg, ngtcp2_tstamp ts) { + ngtcp2_ssize nwrite; + size_t origlen; + size_t origdestlen = destlen; + int rv; + uint8_t wflags = NGTCP2_WRITE_PKT_FLAG_NONE; + int ppe_pending = (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) != 0; + ngtcp2_conn_stat *cstat = &conn->cstat; + ngtcp2_ssize res = 0; + uint64_t server_tx_left; + uint64_t datalen; + uint64_t write_datalen = 0; + int64_t prev_in_pkt_num = -1; + ngtcp2_ksl_it it; + ngtcp2_rtb_entry *rtbent; + (void)pkt_info_version; + + conn->log.last_ts = ts; + conn->qlog.last_ts = ts; + + if (path) { + ngtcp2_path_copy(path, &conn->dcid.current.ps.path); + } + + origlen = destlen = + conn_shape_udp_payload(conn, &conn->dcid.current, destlen); + + if (!ppe_pending && pi) { + pi->ecn = NGTCP2_ECN_NOT_ECT; + } + + switch (conn->state) { + case NGTCP2_CS_CLIENT_INITIAL: + case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE: + case NGTCP2_CS_CLIENT_TLS_HANDSHAKE_FAILED: + if (!conn_pacing_pkt_tx_allowed(conn, ts)) { + assert(!ppe_pending); + + return conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts); + } + + nwrite = conn_client_write_handshake(conn, pi, dest, destlen, vmsg, ts); + /* We might be unable to write a packet because of depletion of + congestion window budget, perhaps due to packet loss that + shrinks the window drastically. */ + if (nwrite <= 0) { + return nwrite; + } + if (conn->state != NGTCP2_CS_POST_HANDSHAKE) { + return nwrite; + } + + assert(nwrite); + assert(dest[0] & NGTCP2_HEADER_FORM_BIT); + assert(conn->negotiated_version); + + if (ngtcp2_pkt_get_type_long(conn->negotiated_version, dest[0]) == + NGTCP2_PKT_INITIAL) { + /* We have added padding already, but in that case, there is no + space left to write 1RTT packet. */ + wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + } + + res = nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + /* Break here so that we can coalesces 1RTT packet. */ + break; + case NGTCP2_CS_SERVER_INITIAL: + case NGTCP2_CS_SERVER_WAIT_HANDSHAKE: + case NGTCP2_CS_SERVER_TLS_HANDSHAKE_FAILED: + if (!conn_pacing_pkt_tx_allowed(conn, ts)) { + assert(!ppe_pending); + + if (!(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { + server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); + if (server_tx_left == 0) { + return 0; + } + + origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left); + } + + return conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts); + } + + if (!ppe_pending) { + if (!(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { + server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); + if (server_tx_left == 0) { + if (cstat->loss_detection_timer != UINT64_MAX) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_RCV, + "loss detection timer canceled due to amplification limit"); + cstat->loss_detection_timer = UINT64_MAX; + } + + return 0; + } + + destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + } + + if (vmsg) { + switch (vmsg->type) { + case NGTCP2_VMSG_TYPE_STREAM: + datalen = ngtcp2_vec_len(vmsg->stream.data, vmsg->stream.datacnt); + if (datalen == 0 || (datalen > 0 && + (vmsg->stream.strm->tx.max_offset - + vmsg->stream.strm->tx.offset) && + (conn->tx.max_offset - conn->tx.offset))) { + write_datalen = + conn_enforce_flow_control(conn, vmsg->stream.strm, datalen); + write_datalen = + ngtcp2_min(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN); + write_datalen += NGTCP2_STREAM_OVERHEAD; + } + break; + case NGTCP2_VMSG_TYPE_DATAGRAM: + write_datalen = + ngtcp2_vec_len(vmsg->datagram.data, vmsg->datagram.datacnt) + + NGTCP2_DATAGRAM_OVERHEAD; + break; + default: + ngtcp2_unreachable(); + } + + if (conn->in_pktns && write_datalen > 0) { + it = ngtcp2_rtb_head(&conn->in_pktns->rtb); + if (!ngtcp2_ksl_it_end(&it)) { + rtbent = ngtcp2_ksl_it_get(&it); + prev_in_pkt_num = rtbent->hd.pkt_num; + } + } + } + + nwrite = conn_write_handshake(conn, pi, dest, destlen, write_datalen, ts); + if (nwrite < 0) { + return nwrite; + } + + res = nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + + if (conn->in_pktns && write_datalen > 0) { + it = ngtcp2_rtb_head(&conn->in_pktns->rtb); + if (!ngtcp2_ksl_it_end(&it)) { + rtbent = ngtcp2_ksl_it_get(&it); + if (rtbent->hd.pkt_num != prev_in_pkt_num && + (rtbent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + /* We have added padding already, but in that case, there + is no space left to write 1RTT packet. */ + wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + } + } + } + } + if (conn->state != NGTCP2_CS_POST_HANDSHAKE && + conn->pktns.crypto.tx.ckm == NULL) { + return res; + } + break; + case NGTCP2_CS_POST_HANDSHAKE: + if (!conn_pacing_pkt_tx_allowed(conn, ts)) { + assert(!ppe_pending); + + if (conn->server && + !(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { + server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); + if (server_tx_left == 0) { + return 0; + } + + origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left); + } + + return conn_write_ack_pkt(conn, pi, dest, origlen, NGTCP2_PKT_1RTT, ts); + } + + break; + case NGTCP2_CS_CLOSING: + return NGTCP2_ERR_CLOSING; + case NGTCP2_CS_DRAINING: + return NGTCP2_ERR_DRAINING; + default: + return 0; + } + + assert(conn->pktns.crypto.tx.ckm); + + if (conn_check_pkt_num_exhausted(conn)) { + return NGTCP2_ERR_PKT_NUM_EXHAUSTED; + } + + if (vmsg) { + switch (vmsg->type) { + case NGTCP2_VMSG_TYPE_STREAM: + if (vmsg->stream.flags & NGTCP2_WRITE_STREAM_FLAG_MORE) { + wflags |= NGTCP2_WRITE_PKT_FLAG_MORE; + } + break; + case NGTCP2_VMSG_TYPE_DATAGRAM: + if (vmsg->datagram.flags & NGTCP2_WRITE_DATAGRAM_FLAG_MORE) { + wflags |= NGTCP2_WRITE_PKT_FLAG_MORE; + } + break; + default: + break; + } + } + + if (ppe_pending) { + res = conn->pkt.hs_spktlen; + conn->pkt.hs_spktlen = 0; + if (conn->pkt.require_padding) { + wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + } + /* dest and destlen have already been adjusted in ppe in the first + run. They are adjusted for probe packet later. */ + nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT, + wflags, ts); + goto fin; + } else { + conn->pkt.require_padding = + (wflags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING); + + if (conn->state == NGTCP2_CS_POST_HANDSHAKE) { + rv = conn_prepare_key_update(conn, ts); + if (rv != 0) { + return rv; + } + } + + if (!conn->pktns.rtb.probe_pkt_left && conn_cwnd_is_zero(conn)) { + destlen = 0; + } else { + if (res == 0) { + nwrite = + conn_write_path_response(conn, path, pi, dest, origdestlen, ts); + if (nwrite) { + goto fin; + } + + if (conn->pv) { + nwrite = + conn_write_path_challenge(conn, path, pi, dest, origdestlen, ts); + if (nwrite) { + goto fin; + } + } + + if (conn->pmtud && + (conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED) && + (!conn->hs_pktns || + ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq) == 0)) { + nwrite = conn_write_pmtud_probe(conn, pi, dest, origdestlen, ts); + if (nwrite) { + goto fin; + } + } + } + } + + if (conn->server && + !(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) { + server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); + origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left); + destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + + if (server_tx_left == 0 && + conn->cstat.loss_detection_timer != UINT64_MAX) { + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_RCV, + "loss detection timer canceled due to amplification limit"); + conn->cstat.loss_detection_timer = UINT64_MAX; + } + } + } + + if (res == 0) { + if (conn_handshake_remnants_left(conn)) { + if (conn_handshake_probe_left(conn) || + /* Allow exceeding CWND if an Handshake packet needs to be + sent in order to avoid dead lock. In some situation, + typically for client, 1 RTT packets may occupy in-flight + bytes (e.g., some large requests and PMTUD), and + Handshake packet loss shrinks CWND, and we may get in the + situation that we are unable to send Handshake packet. */ + (conn->hs_pktns->rtb.num_pto_eliciting == 0 && + ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq))) { + destlen = origlen; + } + nwrite = conn_write_handshake_pkts(conn, pi, dest, destlen, + /* write_datalen = */ 0, ts); + if (nwrite < 0) { + return nwrite; + } + if (nwrite > 0) { + res = nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + } else if (destlen == 0) { + res = conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts); + if (res) { + return res; + } + } + } + } + + if (conn->pktns.rtb.probe_pkt_left) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "transmit probe pkt left=%zu", + conn->pktns.rtb.probe_pkt_left); + + nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT, + wflags, ts); + + goto fin; + } + + nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT, + wflags, ts); + if (nwrite) { + assert(nwrite != NGTCP2_ERR_NOBUF); + goto fin; + } + + if (res == 0) { + nwrite = conn_write_ack_pkt(conn, pi, dest, origlen, NGTCP2_PKT_1RTT, ts); + } + +fin: + conn->pkt.hs_spktlen = 0; + + if (nwrite >= 0) { + res += nwrite; + return res; + } + /* NGTCP2_CONN_FLAG_PPE_PENDING is set in conn_write_pkt above. + ppe_pending cannot be used here. */ + if (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) { + conn->pkt.hs_spktlen = res; + } + + return nwrite; +} + +static ngtcp2_ssize +conn_write_connection_close(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, uint8_t pkt_type, + uint64_t error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_tstamp ts) { + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + ngtcp2_ssize res = 0, nwrite; + ngtcp2_frame fr; + uint8_t flags = NGTCP2_WRITE_PKT_FLAG_NONE; + + fr.type = NGTCP2_FRAME_CONNECTION_CLOSE; + fr.connection_close.error_code = error_code; + fr.connection_close.frame_type = 0; + fr.connection_close.reasonlen = reasonlen; + fr.connection_close.reason = (uint8_t *)reason; + + if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) && + pkt_type != NGTCP2_PKT_INITIAL) { + if (in_pktns && conn->server) { + nwrite = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, + NGTCP2_WRITE_PKT_FLAG_NONE, &conn->dcid.current.cid, &fr, + NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + if (nwrite < 0) { + return nwrite; + } + + dest += nwrite; + destlen -= (size_t)nwrite; + res += nwrite; + } + + if (pkt_type != NGTCP2_PKT_HANDSHAKE && hs_pktns && + hs_pktns->crypto.tx.ckm) { + nwrite = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, + NGTCP2_WRITE_PKT_FLAG_NONE, &conn->dcid.current.cid, &fr, + NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + if (nwrite < 0) { + return nwrite; + } + + dest += nwrite; + destlen -= (size_t)nwrite; + res += nwrite; + } + } + + if (!conn->server && pkt_type == NGTCP2_PKT_INITIAL) { + flags = NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING; + } + + nwrite = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, destlen, pkt_type, flags, &conn->dcid.current.cid, &fr, + NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + + if (nwrite < 0) { + return nwrite; + } + + res += nwrite; + + if (res == 0) { + return NGTCP2_ERR_NOBUF; + } + + return res; +} + +ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt( + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_tstamp ts) { + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + uint8_t pkt_type; + ngtcp2_ssize nwrite; + uint64_t server_tx_left; + + conn->log.last_ts = ts; + conn->qlog.last_ts = ts; + + if (conn_check_pkt_num_exhausted(conn)) { + return NGTCP2_ERR_PKT_NUM_EXHAUSTED; + } + + switch (conn->state) { + case NGTCP2_CS_CLIENT_INITIAL: + return NGTCP2_ERR_INVALID_STATE; + case NGTCP2_CS_CLOSING: + case NGTCP2_CS_DRAINING: + return 0; + default: + break; + } + + if (path) { + ngtcp2_path_copy(path, &conn->dcid.current.ps.path); + } + + destlen = conn_shape_udp_payload(conn, &conn->dcid.current, destlen); + + if (pi) { + pi->ecn = NGTCP2_ECN_NOT_ECT; + } + + if (conn->server) { + server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); + destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + } + + if (conn->state == NGTCP2_CS_POST_HANDSHAKE || + (conn->server && conn->pktns.crypto.tx.ckm)) { + pkt_type = NGTCP2_PKT_1RTT; + } else if (hs_pktns && hs_pktns->crypto.tx.ckm) { + pkt_type = NGTCP2_PKT_HANDSHAKE; + } else if (in_pktns && in_pktns->crypto.tx.ckm) { + pkt_type = NGTCP2_PKT_INITIAL; + } else { + /* This branch is taken if server has not read any Initial packet + from client. */ + return NGTCP2_ERR_INVALID_STATE; + } + + nwrite = conn_write_connection_close(conn, pi, dest, destlen, pkt_type, + error_code, reason, reasonlen, ts); + if (nwrite < 0) { + return nwrite; + } + + conn->state = NGTCP2_CS_CLOSING; + + return nwrite; +} + +ngtcp2_ssize ngtcp2_conn_write_application_close_pkt( + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t app_error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_tstamp ts) { + ngtcp2_ssize nwrite; + ngtcp2_ssize res = 0; + ngtcp2_frame fr; + uint64_t server_tx_left; + + conn->log.last_ts = ts; + conn->qlog.last_ts = ts; + + if (conn_check_pkt_num_exhausted(conn)) { + return NGTCP2_ERR_PKT_NUM_EXHAUSTED; + } + + switch (conn->state) { + case NGTCP2_CS_CLIENT_INITIAL: + return NGTCP2_ERR_INVALID_STATE; + case NGTCP2_CS_CLOSING: + case NGTCP2_CS_DRAINING: + return 0; + default: + break; + } + + if (path) { + ngtcp2_path_copy(path, &conn->dcid.current.ps.path); + } + + destlen = conn_shape_udp_payload(conn, &conn->dcid.current, destlen); + + if (pi) { + pi->ecn = NGTCP2_ECN_NOT_ECT; + } + + if (conn->server) { + server_tx_left = conn_server_tx_left(conn, &conn->dcid.current); + destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left); + } + + if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)) { + nwrite = conn_write_connection_close(conn, pi, dest, destlen, + conn->hs_pktns->crypto.tx.ckm + ? NGTCP2_PKT_HANDSHAKE + : NGTCP2_PKT_INITIAL, + NGTCP2_APPLICATION_ERROR, NULL, 0, ts); + if (nwrite < 0) { + return nwrite; + } + res = nwrite; + dest += nwrite; + destlen -= (size_t)nwrite; + } + + if (conn->state != NGTCP2_CS_POST_HANDSHAKE) { + assert(res); + + if (!conn->server || !conn->pktns.crypto.tx.ckm) { + return res; + } + } + + assert(conn->pktns.crypto.tx.ckm); + + fr.type = NGTCP2_FRAME_CONNECTION_CLOSE_APP; + fr.connection_close.error_code = app_error_code; + fr.connection_close.frame_type = 0; + fr.connection_close.reasonlen = reasonlen; + fr.connection_close.reason = (uint8_t *)reason; + + nwrite = ngtcp2_conn_write_single_frame_pkt( + conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE, + &conn->dcid.current.cid, &fr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts); + + if (nwrite < 0) { + return nwrite; + } + + res += nwrite; + + if (res == 0) { + return NGTCP2_ERR_NOBUF; + } + + conn->state = NGTCP2_CS_CLOSING; + + return res; +} + +static void +connection_close_error_init(ngtcp2_connection_close_error *ccerr, + ngtcp2_connection_close_error_code_type type, + uint64_t error_code, const uint8_t *reason, + size_t reasonlen) { + ccerr->type = type; + ccerr->error_code = error_code; + ccerr->frame_type = 0; + ccerr->reason = (uint8_t *)reason; + ccerr->reasonlen = reasonlen; +} + +void ngtcp2_connection_close_error_default( + ngtcp2_connection_close_error *ccerr) { + connection_close_error_init(ccerr, + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT, + NGTCP2_NO_ERROR, NULL, 0); +} + +void ngtcp2_connection_close_error_set_transport_error( + ngtcp2_connection_close_error *ccerr, uint64_t error_code, + const uint8_t *reason, size_t reasonlen) { + connection_close_error_init(ccerr, + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT, + error_code, reason, reasonlen); +} + +void ngtcp2_connection_close_error_set_transport_error_liberr( + ngtcp2_connection_close_error *ccerr, int liberr, const uint8_t *reason, + size_t reasonlen) { + switch (liberr) { + case NGTCP2_ERR_RECV_VERSION_NEGOTIATION: + connection_close_error_init( + ccerr, + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION, + NGTCP2_NO_ERROR, reason, reasonlen); + + return; + case NGTCP2_ERR_IDLE_CLOSE: + connection_close_error_init( + ccerr, NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE, + NGTCP2_NO_ERROR, reason, reasonlen); + + return; + }; + + ngtcp2_connection_close_error_set_transport_error( + ccerr, ngtcp2_err_infer_quic_transport_error_code(liberr), reason, + reasonlen); +} + +void ngtcp2_connection_close_error_set_transport_error_tls_alert( + ngtcp2_connection_close_error *ccerr, uint8_t tls_alert, + const uint8_t *reason, size_t reasonlen) { + ngtcp2_connection_close_error_set_transport_error( + ccerr, NGTCP2_CRYPTO_ERROR | tls_alert, reason, reasonlen); +} + +void ngtcp2_connection_close_error_set_application_error( + ngtcp2_connection_close_error *ccerr, uint64_t error_code, + const uint8_t *reason, size_t reasonlen) { + connection_close_error_init( + ccerr, NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION, error_code, + reason, reasonlen); +} + +ngtcp2_ssize ngtcp2_conn_write_connection_close_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, + const ngtcp2_connection_close_error *ccerr, ngtcp2_tstamp ts) { + (void)pkt_info_version; + + switch (ccerr->type) { + case NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT: + return ngtcp2_conn_write_connection_close_pkt( + conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason, + ccerr->reasonlen, ts); + case NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION: + return ngtcp2_conn_write_application_close_pkt( + conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason, + ccerr->reasonlen, ts); + default: + return 0; + } +} + +int ngtcp2_conn_is_in_closing_period(ngtcp2_conn *conn) { + return conn->state == NGTCP2_CS_CLOSING; +} + +int ngtcp2_conn_is_in_draining_period(ngtcp2_conn *conn) { + return conn->state == NGTCP2_CS_DRAINING; +} + +int ngtcp2_conn_close_stream(ngtcp2_conn *conn, ngtcp2_strm *strm) { + int rv; + + rv = conn_call_stream_close(conn, strm); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_map_remove(&conn->strms, (ngtcp2_map_key_type)strm->stream_id); + if (rv != 0) { + assert(rv != NGTCP2_ERR_INVALID_ARGUMENT); + return rv; + } + + if (ngtcp2_strm_is_tx_queued(strm)) { + ngtcp2_pq_remove(&conn->tx.strmq, &strm->pe); + if (!ngtcp2_strm_streamfrq_empty(strm)) { + assert(conn->tx.strmq_nretrans); + --conn->tx.strmq_nretrans; + } + } + + ngtcp2_strm_free(strm); + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm); + + return 0; +} + +int ngtcp2_conn_close_stream_if_shut_rdwr(ngtcp2_conn *conn, + ngtcp2_strm *strm) { + if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RDWR) == + NGTCP2_STRM_FLAG_SHUT_RDWR && + ((strm->flags & NGTCP2_STRM_FLAG_RECV_RST) || + ngtcp2_strm_rx_offset(strm) == strm->rx.last_offset) && + (((strm->flags & NGTCP2_STRM_FLAG_SENT_RST) && + (strm->flags & NGTCP2_STRM_FLAG_RST_ACKED)) || + ngtcp2_strm_is_all_tx_data_fin_acked(strm))) { + return ngtcp2_conn_close_stream(conn, strm); + } + return 0; +} + +/* + * conn_shutdown_stream_write closes send stream with error code + * |app_error_code|. RESET_STREAM frame is scheduled. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_shutdown_stream_write(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint64_t app_error_code) { + ngtcp2_strm_set_app_error_code(strm, app_error_code); + + if ((strm->flags & NGTCP2_STRM_FLAG_SENT_RST) || + ngtcp2_strm_is_all_tx_data_fin_acked(strm)) { + return 0; + } + + /* Set this flag so that we don't accidentally send DATA to this + stream. */ + strm->flags |= NGTCP2_STRM_FLAG_SHUT_WR | NGTCP2_STRM_FLAG_SENT_RST; + + ngtcp2_strm_streamfrq_clear(strm); + + return conn_reset_stream(conn, strm, app_error_code); +} + +/* + * conn_shutdown_stream_read closes read stream with error code + * |app_error_code|. STOP_SENDING frame is scheduled. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_shutdown_stream_read(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint64_t app_error_code) { + ngtcp2_strm_set_app_error_code(strm, app_error_code); + + if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) { + return 0; + } + if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) && + ngtcp2_strm_rx_offset(strm) == strm->rx.last_offset) { + return 0; + } + + /* Extend connection flow control window for the amount of data + which are not passed to application. */ + if (!(strm->flags & + (NGTCP2_STRM_FLAG_STOP_SENDING | NGTCP2_STRM_FLAG_RECV_RST))) { + ngtcp2_conn_extend_max_offset(conn, strm->rx.last_offset - + ngtcp2_strm_rx_offset(strm)); + } + + strm->flags |= NGTCP2_STRM_FLAG_STOP_SENDING; + + return conn_stop_sending(conn, strm, app_error_code); +} + +int ngtcp2_conn_shutdown_stream(ngtcp2_conn *conn, int64_t stream_id, + uint64_t app_error_code) { + int rv; + ngtcp2_strm *strm; + + strm = ngtcp2_conn_find_stream(conn, stream_id); + if (strm == NULL) { + return 0; + } + + if (bidi_stream(stream_id) || !conn_local_stream(conn, stream_id)) { + rv = conn_shutdown_stream_read(conn, strm, app_error_code); + if (rv != 0) { + return rv; + } + } + + if (bidi_stream(stream_id) || conn_local_stream(conn, stream_id)) { + rv = conn_shutdown_stream_write(conn, strm, app_error_code); + if (rv != 0) { + return rv; + } + } + + return 0; +} + +int ngtcp2_conn_shutdown_stream_write(ngtcp2_conn *conn, int64_t stream_id, + uint64_t app_error_code) { + ngtcp2_strm *strm; + + if (!bidi_stream(stream_id) && !conn_local_stream(conn, stream_id)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + strm = ngtcp2_conn_find_stream(conn, stream_id); + if (strm == NULL) { + return 0; + } + + return conn_shutdown_stream_write(conn, strm, app_error_code); +} + +int ngtcp2_conn_shutdown_stream_read(ngtcp2_conn *conn, int64_t stream_id, + uint64_t app_error_code) { + ngtcp2_strm *strm; + + if (!bidi_stream(stream_id) && conn_local_stream(conn, stream_id)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + strm = ngtcp2_conn_find_stream(conn, stream_id); + if (strm == NULL) { + return 0; + } + + return conn_shutdown_stream_read(conn, strm, app_error_code); +} + +/* + * conn_extend_max_stream_offset extends stream level flow control + * window by |datalen| of the stream denoted by |strm|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int conn_extend_max_stream_offset(ngtcp2_conn *conn, ngtcp2_strm *strm, + uint64_t datalen) { + ngtcp2_strm *top; + + if (datalen > NGTCP2_MAX_VARINT || + strm->rx.unsent_max_offset > NGTCP2_MAX_VARINT - datalen) { + strm->rx.unsent_max_offset = NGTCP2_MAX_VARINT; + } else { + strm->rx.unsent_max_offset += datalen; + } + + if (!(strm->flags & + (NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_STOP_SENDING)) && + !ngtcp2_strm_is_tx_queued(strm) && + conn_should_send_max_stream_data(conn, strm)) { + if (!ngtcp2_pq_empty(&conn->tx.strmq)) { + top = ngtcp2_conn_tx_strmq_top(conn); + strm->cycle = top->cycle; + } + strm->cycle = conn_tx_strmq_first_cycle(conn); + return ngtcp2_conn_tx_strmq_push(conn, strm); + } + + return 0; +} + +int ngtcp2_conn_extend_max_stream_offset(ngtcp2_conn *conn, int64_t stream_id, + uint64_t datalen) { + ngtcp2_strm *strm; + + strm = ngtcp2_conn_find_stream(conn, stream_id); + if (strm == NULL) { + return 0; + } + + return conn_extend_max_stream_offset(conn, strm, datalen); +} + +void ngtcp2_conn_extend_max_offset(ngtcp2_conn *conn, uint64_t datalen) { + if (NGTCP2_MAX_VARINT < datalen || + conn->rx.unsent_max_offset > NGTCP2_MAX_VARINT - datalen) { + conn->rx.unsent_max_offset = NGTCP2_MAX_VARINT; + return; + } + + conn->rx.unsent_max_offset += datalen; +} + +void ngtcp2_conn_extend_max_streams_bidi(ngtcp2_conn *conn, size_t n) { + handle_max_remote_streams_extension(&conn->remote.bidi.unsent_max_streams, n); +} + +void ngtcp2_conn_extend_max_streams_uni(ngtcp2_conn *conn, size_t n) { + handle_max_remote_streams_extension(&conn->remote.uni.unsent_max_streams, n); +} + +const ngtcp2_cid *ngtcp2_conn_get_dcid(ngtcp2_conn *conn) { + return &conn->dcid.current.cid; +} + +const ngtcp2_cid *ngtcp2_conn_get_client_initial_dcid(ngtcp2_conn *conn) { + return &conn->rcid; +} + +uint32_t ngtcp2_conn_get_client_chosen_version(ngtcp2_conn *conn) { + return conn->client_chosen_version; +} + +uint32_t ngtcp2_conn_get_negotiated_version(ngtcp2_conn *conn) { + return conn->negotiated_version; +} + +static int delete_strms_pq_each(void *data, void *ptr) { + ngtcp2_conn *conn = ptr; + ngtcp2_strm *s = data; + + if (ngtcp2_strm_is_tx_queued(s)) { + ngtcp2_pq_remove(&conn->tx.strmq, &s->pe); + if (!ngtcp2_strm_streamfrq_empty(s)) { + assert(conn->tx.strmq_nretrans); + --conn->tx.strmq_nretrans; + } + } + + ngtcp2_strm_free(s); + ngtcp2_objalloc_strm_release(&conn->strm_objalloc, s); + + return 0; +} + +/* + * conn_discard_early_data_state discards any connection states which + * are altered by any operations during early data transfer. + */ +static void conn_discard_early_data_state(ngtcp2_conn *conn) { + ngtcp2_frame_chain **pfrc, *frc; + + ngtcp2_rtb_remove_early_data(&conn->pktns.rtb, &conn->cstat); + + ngtcp2_map_each_free(&conn->strms, delete_strms_pq_each, conn); + ngtcp2_map_clear(&conn->strms); + + conn->tx.offset = 0; + + conn->rx.unsent_max_offset = conn->rx.max_offset = + conn->local.transport_params.initial_max_data; + + conn->remote.bidi.unsent_max_streams = conn->remote.bidi.max_streams = + conn->local.transport_params.initial_max_streams_bidi; + + conn->remote.uni.unsent_max_streams = conn->remote.uni.max_streams = + conn->local.transport_params.initial_max_streams_uni; + + if (conn->server) { + conn->local.bidi.next_stream_id = 1; + conn->local.uni.next_stream_id = 3; + } else { + conn->local.bidi.next_stream_id = 0; + conn->local.uni.next_stream_id = 2; + } + + for (pfrc = &conn->pktns.tx.frq; *pfrc;) { + frc = *pfrc; + *pfrc = (*pfrc)->next; + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + } +} + +int ngtcp2_conn_early_data_rejected(ngtcp2_conn *conn) { + if (conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) { + return 0; + } + + conn->flags |= NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED; + + conn_discard_early_data_state(conn); + + if (conn->callbacks.early_data_rejected) { + return conn->callbacks.early_data_rejected(conn, conn->user_data); + } + + return 0; +} + +int ngtcp2_conn_get_early_data_rejected(ngtcp2_conn *conn) { + return (conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) != 0; +} + +int ngtcp2_conn_update_rtt(ngtcp2_conn *conn, ngtcp2_duration rtt, + ngtcp2_duration ack_delay, ngtcp2_tstamp ts) { + ngtcp2_conn_stat *cstat = &conn->cstat; + + if (cstat->min_rtt == UINT64_MAX) { + cstat->latest_rtt = rtt; + cstat->min_rtt = rtt; + cstat->smoothed_rtt = rtt; + cstat->rttvar = rtt / 2; + cstat->first_rtt_sample_ts = ts; + } else { + if (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) { + assert(conn->remote.transport_params); + + ack_delay = + ngtcp2_min(ack_delay, conn->remote.transport_params->max_ack_delay); + } else if (ack_delay > 0 && rtt >= cstat->min_rtt && + rtt < cstat->min_rtt + ack_delay) { + /* Ignore RTT sample if adjusting ack_delay causes the sample + less than min_rtt before handshake confirmation. */ + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_RCV, + "ignore rtt sample because ack_delay is too large latest_rtt=%" PRIu64 + " min_rtt=%" PRIu64 " ack_delay=%" PRIu64, + rtt / NGTCP2_MILLISECONDS, cstat->min_rtt / NGTCP2_MILLISECONDS, + ack_delay / NGTCP2_MILLISECONDS); + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + cstat->latest_rtt = rtt; + cstat->min_rtt = ngtcp2_min(cstat->min_rtt, rtt); + + if (rtt >= cstat->min_rtt + ack_delay) { + rtt -= ack_delay; + } + + cstat->rttvar = (cstat->rttvar * 3 + (cstat->smoothed_rtt < rtt + ? rtt - cstat->smoothed_rtt + : cstat->smoothed_rtt - rtt)) / + 4; + cstat->smoothed_rtt = (cstat->smoothed_rtt * 7 + rtt) / 8; + } + + ngtcp2_log_info( + &conn->log, NGTCP2_LOG_EVENT_RCV, + "latest_rtt=%" PRIu64 " min_rtt=%" PRIu64 " smoothed_rtt=%" PRIu64 + " rttvar=%" PRIu64 " ack_delay=%" PRIu64, + cstat->latest_rtt / NGTCP2_MILLISECONDS, + cstat->min_rtt / NGTCP2_MILLISECONDS, + cstat->smoothed_rtt / NGTCP2_MILLISECONDS, + cstat->rttvar / NGTCP2_MILLISECONDS, ack_delay / NGTCP2_MILLISECONDS); + + return 0; +} + +void ngtcp2_conn_get_conn_stat_versioned(ngtcp2_conn *conn, + int conn_stat_version, + ngtcp2_conn_stat *cstat) { + (void)conn_stat_version; + + *cstat = conn->cstat; +} + +static void conn_get_loss_time_and_pktns(ngtcp2_conn *conn, + ngtcp2_tstamp *ploss_time, + ngtcp2_pktns **ppktns) { + ngtcp2_pktns *const ns[] = {conn->hs_pktns, &conn->pktns}; + ngtcp2_conn_stat *cstat = &conn->cstat; + ngtcp2_duration *loss_time = cstat->loss_time + 1; + ngtcp2_tstamp earliest_loss_time = cstat->loss_time[NGTCP2_PKTNS_ID_INITIAL]; + ngtcp2_pktns *pktns = conn->in_pktns; + size_t i; + + for (i = 0; i < ngtcp2_arraylen(ns); ++i) { + if (ns[i] == NULL || loss_time[i] >= earliest_loss_time) { + continue; + } + + earliest_loss_time = loss_time[i]; + pktns = ns[i]; + } + + if (ploss_time) { + *ploss_time = earliest_loss_time; + } + if (ppktns) { + *ppktns = pktns; + } +} + +static ngtcp2_tstamp conn_get_earliest_pto_expiry(ngtcp2_conn *conn, + ngtcp2_tstamp ts) { + ngtcp2_pktns *ns[] = {conn->in_pktns, conn->hs_pktns, &conn->pktns}; + size_t i; + ngtcp2_tstamp earliest_ts = UINT64_MAX, t; + ngtcp2_conn_stat *cstat = &conn->cstat; + ngtcp2_tstamp *times = cstat->last_tx_pkt_ts; + ngtcp2_duration duration = + compute_pto(cstat->smoothed_rtt, cstat->rttvar, /* max_ack_delay = */ 0) * + (1ULL << cstat->pto_count); + + for (i = NGTCP2_PKTNS_ID_INITIAL; i < NGTCP2_PKTNS_ID_MAX; ++i) { + if (ns[i] == NULL || ns[i]->rtb.num_pto_eliciting == 0 || + (times[i] == UINT64_MAX || + (i == NGTCP2_PKTNS_ID_APPLICATION && + !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)))) { + continue; + } + + t = times[i] + duration; + + if (i == NGTCP2_PKTNS_ID_APPLICATION) { + assert(conn->remote.transport_params); + t += conn->remote.transport_params->max_ack_delay * + (1ULL << cstat->pto_count); + } + + if (t < earliest_ts) { + earliest_ts = t; + } + } + + if (earliest_ts == UINT64_MAX) { + return ts + duration; + } + + return earliest_ts; +} + +void ngtcp2_conn_set_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + ngtcp2_conn_stat *cstat = &conn->cstat; + ngtcp2_duration timeout; + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + ngtcp2_pktns *pktns = &conn->pktns; + ngtcp2_tstamp earliest_loss_time; + + conn_get_loss_time_and_pktns(conn, &earliest_loss_time, NULL); + + if (earliest_loss_time != UINT64_MAX) { + cstat->loss_detection_timer = earliest_loss_time; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV, + "loss_detection_timer=%" PRIu64 " nonzero crypto loss time", + cstat->loss_detection_timer); + return; + } + + if ((!in_pktns || in_pktns->rtb.num_pto_eliciting == 0) && + (!hs_pktns || hs_pktns->rtb.num_pto_eliciting == 0) && + (pktns->rtb.num_pto_eliciting == 0 || + !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)) && + (conn->server || + (conn->flags & (NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED | + NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)))) { + if (cstat->loss_detection_timer != UINT64_MAX) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV, + "loss detection timer canceled"); + cstat->loss_detection_timer = UINT64_MAX; + cstat->pto_count = 0; + } + return; + } + + cstat->loss_detection_timer = conn_get_earliest_pto_expiry(conn, ts); + + timeout = + cstat->loss_detection_timer > ts ? cstat->loss_detection_timer - ts : 0; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV, + "loss_detection_timer=%" PRIu64 " timeout=%" PRIu64, + cstat->loss_detection_timer, timeout / NGTCP2_MILLISECONDS); +} + +int ngtcp2_conn_on_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + ngtcp2_conn_stat *cstat = &conn->cstat; + int rv; + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + ngtcp2_tstamp earliest_loss_time; + ngtcp2_pktns *loss_pktns = NULL; + + conn->log.last_ts = ts; + conn->qlog.last_ts = ts; + + switch (conn->state) { + case NGTCP2_CS_CLOSING: + case NGTCP2_CS_DRAINING: + cstat->loss_detection_timer = UINT64_MAX; + cstat->pto_count = 0; + return 0; + default: + break; + } + + if (cstat->loss_detection_timer == UINT64_MAX) { + return 0; + } + + conn_get_loss_time_and_pktns(conn, &earliest_loss_time, &loss_pktns); + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV, + "loss detection timer fired"); + + if (earliest_loss_time != UINT64_MAX) { + assert(loss_pktns); + + rv = ngtcp2_conn_detect_lost_pkt(conn, loss_pktns, cstat, ts); + if (rv != 0) { + return rv; + } + ngtcp2_conn_set_loss_detection_timer(conn, ts); + return 0; + } + + if (!conn->server && !conn_is_handshake_completed(conn)) { + if (hs_pktns->crypto.tx.ckm) { + hs_pktns->rtb.probe_pkt_left = 1; + } else { + in_pktns->rtb.probe_pkt_left = 1; + } + } else { + if (in_pktns && in_pktns->rtb.num_pto_eliciting) { + in_pktns->rtb.probe_pkt_left = 1; + + assert(hs_pktns); + + if (conn->server && hs_pktns->rtb.num_pto_eliciting) { + /* let server coalesce packets */ + hs_pktns->rtb.probe_pkt_left = 1; + } + } else if (hs_pktns && hs_pktns->rtb.num_pto_eliciting) { + hs_pktns->rtb.probe_pkt_left = 1; + } else { + conn->pktns.rtb.probe_pkt_left = 2; + } + } + + ++cstat->pto_count; + + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV, "pto_count=%zu", + cstat->pto_count); + + ngtcp2_conn_set_loss_detection_timer(conn, ts); + + return 0; +} + +static int conn_buffer_crypto_data(ngtcp2_conn *conn, const uint8_t **pdata, + ngtcp2_pktns *pktns, const uint8_t *data, + size_t datalen) { + int rv; + ngtcp2_buf_chain **pbufchain = &pktns->crypto.tx.data; + + if (*pbufchain) { + for (; (*pbufchain)->next; pbufchain = &(*pbufchain)->next) + ; + + if (ngtcp2_buf_left(&(*pbufchain)->buf) < datalen) { + pbufchain = &(*pbufchain)->next; + } + } + + if (!*pbufchain) { + rv = ngtcp2_buf_chain_new(pbufchain, ngtcp2_max(1024, datalen), conn->mem); + if (rv != 0) { + return rv; + } + } + + *pdata = (*pbufchain)->buf.last; + (*pbufchain)->buf.last = ngtcp2_cpymem((*pbufchain)->buf.last, data, datalen); + + return 0; +} + +int ngtcp2_conn_submit_crypto_data(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + const uint8_t *data, const size_t datalen) { + ngtcp2_pktns *pktns; + ngtcp2_frame_chain *frc; + ngtcp2_crypto *fr; + int rv; + + if (datalen == 0) { + return 0; + } + + switch (crypto_level) { + case NGTCP2_CRYPTO_LEVEL_INITIAL: + assert(conn->in_pktns); + pktns = conn->in_pktns; + break; + case NGTCP2_CRYPTO_LEVEL_HANDSHAKE: + assert(conn->hs_pktns); + pktns = conn->hs_pktns; + break; + case NGTCP2_CRYPTO_LEVEL_APPLICATION: + pktns = &conn->pktns; + break; + default: + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + rv = conn_buffer_crypto_data(conn, &data, pktns, data, datalen); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_frame_chain_objalloc_new(&frc, &conn->frc_objalloc); + if (rv != 0) { + return rv; + } + + fr = &frc->fr.crypto; + + fr->type = NGTCP2_FRAME_CRYPTO; + fr->offset = pktns->crypto.tx.offset; + fr->datacnt = 1; + fr->data[0].len = datalen; + fr->data[0].base = (uint8_t *)data; + + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &fr->offset, frc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem); + return rv; + } + + pktns->crypto.strm.tx.offset += datalen; + pktns->crypto.tx.offset += datalen; + + return 0; +} + +int ngtcp2_conn_submit_new_token(ngtcp2_conn *conn, const uint8_t *token, + size_t tokenlen) { + int rv; + ngtcp2_frame_chain *nfrc; + + assert(conn->server); + assert(token); + assert(tokenlen); + + rv = ngtcp2_frame_chain_new_token_objalloc_new( + &nfrc, token, tokenlen, &conn->frc_objalloc, conn->mem); + if (rv != 0) { + return rv; + } + + nfrc->next = conn->pktns.tx.frq; + conn->pktns.tx.frq = nfrc; + + return 0; +} + +ngtcp2_strm *ngtcp2_conn_tx_strmq_top(ngtcp2_conn *conn) { + assert(!ngtcp2_pq_empty(&conn->tx.strmq)); + return ngtcp2_struct_of(ngtcp2_pq_top(&conn->tx.strmq), ngtcp2_strm, pe); +} + +void ngtcp2_conn_tx_strmq_pop(ngtcp2_conn *conn) { + ngtcp2_strm *strm = ngtcp2_conn_tx_strmq_top(conn); + assert(strm); + ngtcp2_pq_pop(&conn->tx.strmq); + strm->pe.index = NGTCP2_PQ_BAD_INDEX; +} + +int ngtcp2_conn_tx_strmq_push(ngtcp2_conn *conn, ngtcp2_strm *strm) { + return ngtcp2_pq_push(&conn->tx.strmq, &strm->pe); +} + +static int conn_has_uncommited_preferred_address_cid(ngtcp2_conn *conn) { + return conn->server && + !(conn->flags & NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED) && + conn->oscid.datalen && + conn->local.transport_params.preferred_address_present; +} + +size_t ngtcp2_conn_get_num_scid(ngtcp2_conn *conn) { + return ngtcp2_ksl_len(&conn->scid.set) + + (size_t)conn_has_uncommited_preferred_address_cid(conn); +} + +size_t ngtcp2_conn_get_scid(ngtcp2_conn *conn, ngtcp2_cid *dest) { + ngtcp2_cid *origdest = dest; + ngtcp2_ksl_it it; + ngtcp2_scid *scid; + + for (it = ngtcp2_ksl_begin(&conn->scid.set); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + scid = ngtcp2_ksl_it_get(&it); + *dest++ = scid->cid; + } + + if (conn_has_uncommited_preferred_address_cid(conn)) { + *dest++ = conn->local.transport_params.preferred_address.cid; + } + + return (size_t)(dest - origdest); +} + +size_t ngtcp2_conn_get_num_active_dcid(ngtcp2_conn *conn) { + size_t n = 1; /* for conn->dcid.current */ + ngtcp2_pv *pv = conn->pv; + + if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) { + return 0; + } + + if (pv) { + if (pv->dcid.seq != conn->dcid.current.seq) { + ++n; + } + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + pv->fallback_dcid.seq != conn->dcid.current.seq && + pv->fallback_dcid.seq != pv->dcid.seq) { + ++n; + } + } + + n += ngtcp2_ringbuf_len(&conn->dcid.retired.rb); + + return n; +} + +static void copy_dcid_to_cid_token(ngtcp2_cid_token *dest, + const ngtcp2_dcid *src) { + dest->seq = src->seq; + dest->cid = src->cid; + ngtcp2_path_storage_init2(&dest->ps, &src->ps.path); + if ((dest->token_present = + (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) != 0)) { + memcpy(dest->token, src->token, NGTCP2_STATELESS_RESET_TOKENLEN); + } +} + +size_t ngtcp2_conn_get_active_dcid(ngtcp2_conn *conn, ngtcp2_cid_token *dest) { + ngtcp2_pv *pv = conn->pv; + ngtcp2_cid_token *orig = dest; + ngtcp2_dcid *dcid; + size_t len, i; + + if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) { + return 0; + } + + copy_dcid_to_cid_token(dest, &conn->dcid.current); + ++dest; + + if (pv) { + if (pv->dcid.seq != conn->dcid.current.seq) { + copy_dcid_to_cid_token(dest, &pv->dcid); + ++dest; + } + if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) && + pv->fallback_dcid.seq != conn->dcid.current.seq && + pv->fallback_dcid.seq != pv->dcid.seq) { + copy_dcid_to_cid_token(dest, &pv->fallback_dcid); + ++dest; + } + } + + len = ngtcp2_ringbuf_len(&conn->dcid.retired.rb); + for (i = 0; i < len; ++i) { + dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, i); + copy_dcid_to_cid_token(dest, dcid); + ++dest; + } + + return (size_t)(dest - orig); +} + +void ngtcp2_conn_set_local_addr(ngtcp2_conn *conn, const ngtcp2_addr *addr) { + ngtcp2_addr *dest = &conn->dcid.current.ps.path.local; + + assert(addr->addrlen <= + (ngtcp2_socklen)sizeof(conn->dcid.current.ps.local_addrbuf)); + ngtcp2_addr_copy(dest, addr); +} + +void ngtcp2_conn_set_path_user_data(ngtcp2_conn *conn, void *path_user_data) { + conn->dcid.current.ps.path.user_data = path_user_data; +} + +const ngtcp2_path *ngtcp2_conn_get_path(ngtcp2_conn *conn) { + return &conn->dcid.current.ps.path; +} + +size_t ngtcp2_conn_get_max_tx_udp_payload_size(ngtcp2_conn *conn) { + return conn->local.settings.max_tx_udp_payload_size; +} + +size_t ngtcp2_conn_get_path_max_tx_udp_payload_size(ngtcp2_conn *conn) { + if (conn->local.settings.no_tx_udp_payload_size_shaping) { + return ngtcp2_conn_get_max_tx_udp_payload_size(conn); + } + + return conn->dcid.current.max_udp_payload_size; +} + +static int conn_initiate_migration_precheck(ngtcp2_conn *conn, + const ngtcp2_addr *local_addr) { + if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) || + conn->remote.transport_params->disable_active_migration || + conn->dcid.current.cid.datalen == 0 || + (conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_PREFERRED_ADDR))) { + return NGTCP2_ERR_INVALID_STATE; + } + + if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) { + return NGTCP2_ERR_CONN_ID_BLOCKED; + } + + if (ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, local_addr)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + return 0; +} + +int ngtcp2_conn_initiate_immediate_migration(ngtcp2_conn *conn, + const ngtcp2_path *path, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_dcid *dcid; + ngtcp2_duration pto, initial_pto, timeout; + ngtcp2_pv *pv; + + assert(!conn->server); + + conn->log.last_ts = ts; + conn->qlog.last_ts = ts; + + rv = conn_initiate_migration_precheck(conn, &path->local); + if (rv != 0) { + return rv; + } + + ngtcp2_conn_stop_pmtud(conn); + + if (conn->pv) { + rv = conn_abort_pv(conn, ts); + if (rv != 0) { + return rv; + } + } + + rv = conn_retire_dcid(conn, &conn->dcid.current, ts); + if (rv != 0) { + return rv; + } + + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0); + ngtcp2_dcid_set_path(dcid, path); + + ngtcp2_dcid_copy(&conn->dcid.current, dcid); + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + + conn_reset_congestion_state(conn, ts); + conn_reset_ecn_validation_state(conn); + + pto = conn_compute_pto(conn, &conn->pktns); + initial_pto = conn_compute_initial_pto(conn, &conn->pktns); + timeout = 3 * ngtcp2_max(pto, initial_pto); + + /* TODO It might be better to add a new flag which indicates that a + connection should be closed if this path validation failed. The + current design allows an application to continue, by migrating + into yet another path. */ + rv = ngtcp2_pv_new(&pv, dcid, timeout, NGTCP2_PV_FLAG_NONE, &conn->log, + conn->mem); + if (rv != 0) { + return rv; + } + + conn->pv = pv; + + return conn_call_activate_dcid(conn, &conn->dcid.current); +} + +int ngtcp2_conn_initiate_migration(ngtcp2_conn *conn, const ngtcp2_path *path, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_dcid *dcid; + ngtcp2_duration pto, initial_pto, timeout; + ngtcp2_pv *pv; + + assert(!conn->server); + + conn->log.last_ts = ts; + conn->qlog.last_ts = ts; + + rv = conn_initiate_migration_precheck(conn, &path->local); + if (rv != 0) { + return rv; + } + + if (conn->pv) { + rv = conn_abort_pv(conn, ts); + if (rv != 0) { + return rv; + } + } + + dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0); + ngtcp2_dcid_set_path(dcid, path); + + pto = conn_compute_pto(conn, &conn->pktns); + initial_pto = conn_compute_initial_pto(conn, &conn->pktns); + timeout = 3 * ngtcp2_max(pto, initial_pto); + + rv = ngtcp2_pv_new(&pv, dcid, timeout, NGTCP2_PV_FLAG_NONE, &conn->log, + conn->mem); + if (rv != 0) { + return rv; + } + + ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb); + conn->pv = pv; + + return conn_call_activate_dcid(conn, &pv->dcid); +} + +uint64_t ngtcp2_conn_get_max_local_streams_uni(ngtcp2_conn *conn) { + return conn->local.uni.max_streams; +} + +uint64_t ngtcp2_conn_get_max_data_left(ngtcp2_conn *conn) { + return conn->tx.max_offset - conn->tx.offset; +} + +uint64_t ngtcp2_conn_get_max_stream_data_left(ngtcp2_conn *conn, + int64_t stream_id) { + ngtcp2_strm *strm = ngtcp2_conn_find_stream(conn, stream_id); + + if (strm == NULL) { + return 0; + } + + return strm->tx.max_offset - strm->tx.offset; +} + +uint64_t ngtcp2_conn_get_streams_bidi_left(ngtcp2_conn *conn) { + uint64_t n = ngtcp2_ord_stream_id(conn->local.bidi.next_stream_id); + + return n > conn->local.bidi.max_streams + ? 0 + : conn->local.bidi.max_streams - n + 1; +} + +uint64_t ngtcp2_conn_get_streams_uni_left(ngtcp2_conn *conn) { + uint64_t n = ngtcp2_ord_stream_id(conn->local.uni.next_stream_id); + + return n > conn->local.uni.max_streams ? 0 + : conn->local.uni.max_streams - n + 1; +} + +uint64_t ngtcp2_conn_get_cwnd_left(ngtcp2_conn *conn) { + uint64_t bytes_in_flight = conn->cstat.bytes_in_flight; + uint64_t cwnd = conn_get_cwnd(conn); + + if (cwnd > bytes_in_flight) { + return cwnd - bytes_in_flight; + } + + return 0; +} + +ngtcp2_tstamp ngtcp2_conn_get_idle_expiry(ngtcp2_conn *conn) { + ngtcp2_duration trpto; + ngtcp2_duration idle_timeout; + + /* TODO Remote max_idle_timeout becomes effective after handshake + completion. */ + + if (!conn_is_handshake_completed(conn) || + conn->remote.transport_params->max_idle_timeout == 0 || + (conn->local.transport_params.max_idle_timeout && + conn->local.transport_params.max_idle_timeout < + conn->remote.transport_params->max_idle_timeout)) { + idle_timeout = conn->local.transport_params.max_idle_timeout; + } else { + idle_timeout = conn->remote.transport_params->max_idle_timeout; + } + + if (idle_timeout == 0) { + return UINT64_MAX; + } + + trpto = 3 * conn_compute_pto(conn, conn_is_handshake_completed(conn) + ? &conn->pktns + : conn->hs_pktns); + + return conn->idle_ts + ngtcp2_max(idle_timeout, trpto); +} + +ngtcp2_duration ngtcp2_conn_get_pto(ngtcp2_conn *conn) { + return conn_compute_pto( + conn, conn_is_handshake_completed(conn) ? &conn->pktns : conn->hs_pktns); +} + +void ngtcp2_conn_set_initial_crypto_ctx(ngtcp2_conn *conn, + const ngtcp2_crypto_ctx *ctx) { + assert(conn->in_pktns); + conn->in_pktns->crypto.ctx = *ctx; +} + +const ngtcp2_crypto_ctx *ngtcp2_conn_get_initial_crypto_ctx(ngtcp2_conn *conn) { + assert(conn->in_pktns); + return &conn->in_pktns->crypto.ctx; +} + +void ngtcp2_conn_set_retry_aead(ngtcp2_conn *conn, + const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx) { + assert(!conn->crypto.retry_aead_ctx.native_handle); + + conn->crypto.retry_aead = *aead; + conn->crypto.retry_aead_ctx = *aead_ctx; +} + +void ngtcp2_conn_set_crypto_ctx(ngtcp2_conn *conn, + const ngtcp2_crypto_ctx *ctx) { + assert(conn->hs_pktns); + conn->hs_pktns->crypto.ctx = *ctx; + conn->pktns.crypto.ctx = *ctx; +} + +const ngtcp2_crypto_ctx *ngtcp2_conn_get_crypto_ctx(ngtcp2_conn *conn) { + return &conn->pktns.crypto.ctx; +} + +void ngtcp2_conn_set_early_crypto_ctx(ngtcp2_conn *conn, + const ngtcp2_crypto_ctx *ctx) { + conn->early.ctx = *ctx; +} + +const ngtcp2_crypto_ctx *ngtcp2_conn_get_early_crypto_ctx(ngtcp2_conn *conn) { + return &conn->early.ctx; +} + +void *ngtcp2_conn_get_tls_native_handle(ngtcp2_conn *conn) { + return conn->crypto.tls_native_handle; +} + +void ngtcp2_conn_set_tls_native_handle(ngtcp2_conn *conn, + void *tls_native_handle) { + conn->crypto.tls_native_handle = tls_native_handle; +} + +void ngtcp2_conn_get_connection_close_error( + ngtcp2_conn *conn, ngtcp2_connection_close_error *ccerr) { + *ccerr = conn->rx.ccerr; +} + +void ngtcp2_conn_set_tls_error(ngtcp2_conn *conn, int liberr) { + conn->crypto.tls_error = liberr; +} + +int ngtcp2_conn_get_tls_error(ngtcp2_conn *conn) { + return conn->crypto.tls_error; +} + +void ngtcp2_conn_set_tls_alert(ngtcp2_conn *conn, uint8_t alert) { + conn->crypto.tls_alert = alert; +} + +uint8_t ngtcp2_conn_get_tls_alert(ngtcp2_conn *conn) { + return conn->crypto.tls_alert; +} + +int ngtcp2_conn_is_local_stream(ngtcp2_conn *conn, int64_t stream_id) { + return conn_local_stream(conn, stream_id); +} + +int ngtcp2_conn_is_server(ngtcp2_conn *conn) { return conn->server; } + +int ngtcp2_conn_after_retry(ngtcp2_conn *conn) { + return (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) != 0; +} + +int ngtcp2_conn_set_stream_user_data(ngtcp2_conn *conn, int64_t stream_id, + void *stream_user_data) { + ngtcp2_strm *strm = ngtcp2_conn_find_stream(conn, stream_id); + + if (strm == NULL) { + return NGTCP2_ERR_STREAM_NOT_FOUND; + } + + strm->stream_user_data = stream_user_data; + + return 0; +} + +void ngtcp2_conn_update_pkt_tx_time(ngtcp2_conn *conn, ngtcp2_tstamp ts) { + double pacing_rate; + ngtcp2_duration interval; + + if (conn->tx.pacing.pktlen == 0) { + return; + } + + if (conn->cstat.pacing_rate > 0) { + pacing_rate = conn->cstat.pacing_rate; + } else { + /* 1.25 is the under-utilization avoidance factor described in + https://datatracker.ietf.org/doc/html/rfc9002#section-7.7 */ + pacing_rate = + (double)conn->cstat.cwnd / (double)conn->cstat.smoothed_rtt * 1.25; + } + + interval = (ngtcp2_duration)((double)conn->tx.pacing.pktlen / pacing_rate); + + conn->tx.pacing.next_ts = ts + interval; + conn->tx.pacing.pktlen = 0; +} + +size_t ngtcp2_conn_get_send_quantum(ngtcp2_conn *conn) { + return conn->cstat.send_quantum; +} + +int ngtcp2_conn_track_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq) { + size_t i; + + if (conn->dcid.retire_unacked.len >= + ngtcp2_arraylen(conn->dcid.retire_unacked.seqs)) { + return NGTCP2_ERR_CONNECTION_ID_LIMIT; + } + + /* Make sure that we do not have a duplicate */ + for (i = 0; i < conn->dcid.retire_unacked.len; ++i) { + if (conn->dcid.retire_unacked.seqs[i] == seq) { + ngtcp2_unreachable(); + } + } + + conn->dcid.retire_unacked.seqs[conn->dcid.retire_unacked.len++] = seq; + + return 0; +} + +void ngtcp2_conn_untrack_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq) { + size_t i; + + for (i = 0; i < conn->dcid.retire_unacked.len; ++i) { + if (conn->dcid.retire_unacked.seqs[i] != seq) { + continue; + } + + if (i != conn->dcid.retire_unacked.len - 1) { + conn->dcid.retire_unacked.seqs[i] = + conn->dcid.retire_unacked.seqs[conn->dcid.retire_unacked.len - 1]; + } + + --conn->dcid.retire_unacked.len; + + return; + } +} + +size_t ngtcp2_conn_get_stream_loss_count(ngtcp2_conn *conn, int64_t stream_id) { + ngtcp2_strm *strm = ngtcp2_conn_find_stream(conn, stream_id); + + if (strm == NULL) { + return 0; + } + + return strm->tx.loss_count; +} + +void ngtcp2_path_challenge_entry_init(ngtcp2_path_challenge_entry *pcent, + const ngtcp2_path *path, + const uint8_t *data) { + ngtcp2_path_storage_init2(&pcent->ps, path); + memcpy(pcent->data, data, sizeof(pcent->data)); +} + +void ngtcp2_settings_default_versioned(int settings_version, + ngtcp2_settings *settings) { + (void)settings_version; + + memset(settings, 0, sizeof(*settings)); + settings->cc_algo = NGTCP2_CC_ALGO_CUBIC; + settings->initial_rtt = NGTCP2_DEFAULT_INITIAL_RTT; + settings->ack_thresh = 2; + settings->max_tx_udp_payload_size = 1500 - 48; + settings->handshake_timeout = NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT; +} + +void ngtcp2_transport_params_default_versioned( + int transport_params_version, ngtcp2_transport_params *params) { + (void)transport_params_version; + + memset(params, 0, sizeof(*params)); + params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE; + params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; + params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY; + params->active_connection_id_limit = + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT; +} + +/* The functions prefixed with ngtcp2_pkt_ are usually put inside + ngtcp2_pkt.c. This function uses encryption construct and uses + test data defined only in ngtcp2_conn_test.c, so it is written + here. */ +ngtcp2_ssize ngtcp2_pkt_write_connection_close( + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx) { + ngtcp2_pkt_hd hd; + ngtcp2_crypto_km ckm; + ngtcp2_crypto_cc cc; + ngtcp2_ppe ppe; + ngtcp2_frame fr = {0}; + int rv; + + ngtcp2_pkt_hd_init(&hd, NGTCP2_PKT_FLAG_LONG_FORM, NGTCP2_PKT_INITIAL, dcid, + scid, /* pkt_num = */ 0, /* pkt_numlen = */ 1, version, + /* len = */ 0); + + ngtcp2_vec_init(&ckm.secret, NULL, 0); + ngtcp2_vec_init(&ckm.iv, iv, 12); + ckm.aead_ctx = *aead_ctx; + ckm.pkt_num = 0; + ckm.flags = NGTCP2_CRYPTO_KM_FLAG_NONE; + + cc.aead = *aead; + cc.hp = *hp; + cc.ckm = &ckm; + cc.hp_ctx = *hp_ctx; + cc.encrypt = encrypt; + cc.hp_mask = hp_mask; + + ngtcp2_ppe_init(&ppe, dest, destlen, &cc); + + rv = ngtcp2_ppe_encode_hd(&ppe, &hd); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + return rv; + } + + if (!ngtcp2_ppe_ensure_hp_sample(&ppe)) { + return NGTCP2_ERR_NOBUF; + } + + fr.type = NGTCP2_FRAME_CONNECTION_CLOSE; + fr.connection_close.error_code = error_code; + fr.connection_close.reasonlen = reasonlen; + fr.connection_close.reason = (uint8_t *)reason; + + rv = ngtcp2_ppe_encode_frame(&ppe, &fr); + if (rv != 0) { + assert(NGTCP2_ERR_NOBUF == rv); + return rv; + } + + return ngtcp2_ppe_final(&ppe, NULL); +} + +int ngtcp2_is_bidi_stream(int64_t stream_id) { return bidi_stream(stream_id); } + +uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, + size_t preferred_versionslen, + const uint32_t *offered_versions, + size_t offered_versionslen) { + size_t i, j; + + if (!preferred_versionslen || !offered_versionslen) { + return 0; + } + + for (i = 0; i < preferred_versionslen; ++i) { + assert(ngtcp2_is_supported_version(preferred_versions[i])); + + for (j = 0; j < offered_versionslen; ++j) { + if (preferred_versions[i] == offered_versions[j]) { + return preferred_versions[i]; + } + } + } + + return 0; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h new file mode 100644 index 0000000..aace7d9 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h @@ -0,0 +1,1115 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_CONN_H +#define NGTCP2_CONN_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" +#include "ngtcp2_crypto.h" +#include "ngtcp2_acktr.h" +#include "ngtcp2_rtb.h" +#include "ngtcp2_strm.h" +#include "ngtcp2_idtr.h" +#include "ngtcp2_str.h" +#include "ngtcp2_pkt.h" +#include "ngtcp2_log.h" +#include "ngtcp2_pq.h" +#include "ngtcp2_cc.h" +#include "ngtcp2_bbr.h" +#include "ngtcp2_bbr2.h" +#include "ngtcp2_pv.h" +#include "ngtcp2_pmtud.h" +#include "ngtcp2_cid.h" +#include "ngtcp2_buf.h" +#include "ngtcp2_ppe.h" +#include "ngtcp2_qlog.h" +#include "ngtcp2_rst.h" + +typedef enum { + /* Client specific handshake states */ + NGTCP2_CS_CLIENT_INITIAL, + NGTCP2_CS_CLIENT_WAIT_HANDSHAKE, + NGTCP2_CS_CLIENT_TLS_HANDSHAKE_FAILED, + /* Server specific handshake states */ + NGTCP2_CS_SERVER_INITIAL, + NGTCP2_CS_SERVER_WAIT_HANDSHAKE, + NGTCP2_CS_SERVER_TLS_HANDSHAKE_FAILED, + /* Shared by both client and server */ + NGTCP2_CS_POST_HANDSHAKE, + NGTCP2_CS_CLOSING, + NGTCP2_CS_DRAINING, +} ngtcp2_conn_state; + +/* NGTCP2_MAX_STREAMS is the maximum number of streams. */ +#define NGTCP2_MAX_STREAMS (1LL << 60) + +/* NGTCP2_MAX_NUM_BUFFED_RX_PKTS is the maximum number of buffered + reordered packets. */ +#define NGTCP2_MAX_NUM_BUFFED_RX_PKTS 4 + +/* NGTCP2_MAX_REORDERED_CRYPTO_DATA is the maximum offset of crypto + data which is not continuous. In other words, there is a gap of + unreceived data. */ +#define NGTCP2_MAX_REORDERED_CRYPTO_DATA 65536 + +/* NGTCP2_MAX_RX_INITIAL_CRYPTO_DATA is the maximum offset of received + crypto stream in Initial packet. We set this hard limit here + because crypto stream is unbounded. */ +#define NGTCP2_MAX_RX_INITIAL_CRYPTO_DATA 65536 +/* NGTCP2_MAX_RX_HANDSHAKE_CRYPTO_DATA is the maximum offset of + received crypto stream in Handshake packet. We set this hard limit + here because crypto stream is unbounded. */ +#define NGTCP2_MAX_RX_HANDSHAKE_CRYPTO_DATA 65536 + +/* NGTCP2_MAX_RETRIES is the number of Retry packet which client can + accept. */ +#define NGTCP2_MAX_RETRIES 3 + +/* NGTCP2_MAX_BOUND_DCID_POOL_SIZE is the maximum number of + destination connection ID which have been bound to a particular + path, but not yet used as primary path and path validation is not + performed from the local endpoint. */ +#define NGTCP2_MAX_BOUND_DCID_POOL_SIZE 4 +/* NGTCP2_MAX_DCID_POOL_SIZE is the maximum number of destination + connection ID the remote endpoint provides to store. It must be + the power of 2. */ +#define NGTCP2_MAX_DCID_POOL_SIZE 8 +/* NGTCP2_MAX_DCID_RETIRED_SIZE is the maximum number of retired DCID + kept to catch in-flight packet on retired path. */ +#define NGTCP2_MAX_DCID_RETIRED_SIZE 2 +/* NGTCP2_MAX_SCID_POOL_SIZE is the maximum number of source + connection ID the local endpoint provides to the remote endpoint. + The chosen value was described in old draft. Now a remote endpoint + tells the maximum value. The value can be quite large, and we have + to put the sane limit.*/ +#define NGTCP2_MAX_SCID_POOL_SIZE 8 + +/* NGTCP2_MAX_NON_ACK_TX_PKT is the maximum number of continuous non + ACK-eliciting packets. */ +#define NGTCP2_MAX_NON_ACK_TX_PKT 3 + +/* NGTCP2_ECN_MAX_NUM_VALIDATION_PKTS is the maximum number of ECN marked + packets sent in NGTCP2_ECN_STATE_TESTING period. */ +#define NGTCP2_ECN_MAX_NUM_VALIDATION_PKTS 10 + +/* NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN is the maximum length + of reason phrase to remember. If the received reason phrase is + longer than this value, it is truncated. */ +#define NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN 1024 + +/* NGTCP2_WRITE_PKT_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_WRITE_PKT_FLAG_NONE 0x00u +/* NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING indicates that packet other + than Initial packet should be padded. Initial packet might be + padded based on QUIC requirement regardless of this flag. */ +#define NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING 0x01u +/* NGTCP2_WRITE_PKT_FLAG_MORE indicates that more frames might come + and it should be encoded into the current packet. */ +#define NGTCP2_WRITE_PKT_FLAG_MORE 0x02u + +/* + * ngtcp2_max_frame is defined so that it covers the largest ACK + * frame. + */ +typedef union ngtcp2_max_frame { + ngtcp2_frame fr; + struct { + ngtcp2_ack ack; + /* ack includes 1 ngtcp2_ack_range. */ + ngtcp2_ack_range ranges[NGTCP2_MAX_ACK_RANGES - 1]; + } ackfr; +} ngtcp2_max_frame; + +typedef struct ngtcp2_path_challenge_entry { + ngtcp2_path_storage ps; + uint8_t data[8]; +} ngtcp2_path_challenge_entry; + +void ngtcp2_path_challenge_entry_init(ngtcp2_path_challenge_entry *pcent, + const ngtcp2_path *path, + const uint8_t *data); + +/* NGTCP2_CONN_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_CONN_FLAG_NONE 0x00u +/* NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED is set when TLS stack declares + that TLS handshake has completed. The condition of this + declaration varies between TLS implementations and this flag does + not indicate the completion of QUIC handshake. Some + implementations declare TLS handshake completion as server when + they write off Server Finished and before deriving application rx + secret. */ +#define NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED 0x01u +/* NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED is set if connection ID is + negotiated. This is only used for client. */ +#define NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED 0x02u +/* NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED is set if transport + parameters are received. */ +#define NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED 0x04u +/* NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED is set when a + local transport parameters are applied. */ +#define NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED 0x08u +/* NGTCP2_CONN_FLAG_RECV_RETRY is set when a client receives Retry + packet. */ +#define NGTCP2_CONN_FLAG_RECV_RETRY 0x10u +/* NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED is set when 0-RTT packet is + rejected by a peer. */ +#define NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED 0x20u +/* NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED is set when the expired + keep-alive timer has been cancelled. */ +#define NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED 0x40u +/* NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED is set when an endpoint + confirmed completion of handshake. */ +#define NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED 0x80u +/* NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED is set when the + library transitions its state to "post handshake". */ +#define NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED 0x0100u +/* NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT is set when the early + handshake retransmission has done when server receives overlapping + Initial crypto data. */ +#define NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT 0x0200u +/* NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT indicates that the local endpoint + sends a QUIC packet without Fixed Bit set if a remote endpoint + supports Greasing QUIC Bit extension. */ +#define NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT 0x0400u +/* NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED is set when key update is + not confirmed by the local endpoint. That is, it has not received + ACK frame which acknowledges packet which is encrypted with new + key. */ +#define NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED 0x0800u +/* NGTCP2_CONN_FLAG_PPE_PENDING is set when + NGTCP2_WRITE_STREAM_FLAG_MORE is used and the intermediate state of + ngtcp2_ppe is stored in pkt struct of ngtcp2_conn. */ +#define NGTCP2_CONN_FLAG_PPE_PENDING 0x1000u +/* NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE is set when idle timer + should be restarted on next write. */ +#define NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE 0x2000u +/* NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED indicates that server as peer + verified client address. This flag is only used by client. */ +#define NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED 0x4000u +/* NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED indicates that an early key is + installed. conn->early.ckm cannot be used for this purpose because + it might be discarded when a certain condition is met. */ +#define NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED 0x8000u +/* NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR is set when the local + endpoint has initiated key update. */ +#define NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR 0x10000u + +typedef struct ngtcp2_crypto_data { + ngtcp2_buf buf; + /* pkt_type is the type of packet to send data in buf. If it is 0, + it must be sent in Short packet. Otherwise, it is sent the long + packet type denoted by pkt_type. */ + uint8_t pkt_type; +} ngtcp2_crypto_data; + +typedef struct ngtcp2_pktns { + struct { + /* last_pkt_num is the packet number which the local endpoint sent + last time.*/ + int64_t last_pkt_num; + ngtcp2_frame_chain *frq; + /* num_non_ack_pkt is the number of continuous non ACK-eliciting + packets. */ + size_t num_non_ack_pkt; + + struct { + /* ect0 is the number of QUIC packets, not UDP datagram, which + are sent in UDP datagram with ECT0 marking. */ + size_t ect0; + /* start_pkt_num is the lowest packet number that are sent + during ECN validation period. */ + int64_t start_pkt_num; + /* validation_pkt_sent is the number of QUIC packets sent during + validation period. */ + size_t validation_pkt_sent; + /* validation_pkt_lost is the number of QUIC packets lost during + validation period. */ + size_t validation_pkt_lost; + } ecn; + } tx; + + struct { + /* pngap tracks received packet number in order to suppress + duplicated packet number. */ + ngtcp2_gaptr pngap; + /* max_pkt_num is the largest packet number received so far. */ + int64_t max_pkt_num; + /* max_pkt_ts is the timestamp when max_pkt_num packet is + received. */ + ngtcp2_tstamp max_pkt_ts; + /* max_ack_eliciting_pkt_num is the largest ack-eliciting packet + number received so far. */ + int64_t max_ack_eliciting_pkt_num; + /* + * buffed_pkts is buffered packets which cannot be decrypted with + * the current encryption level. + * + * In server Initial encryption level, 0-RTT packet may be buffered. + * In server Handshake encryption level, Short packet may be buffered. + * + * In client Initial encryption level, Handshake or Short packet may + * be buffered. In client Handshake encryption level, Short packet + * may be buffered. + * + * - 0-RTT packet is only buffered in server Initial encryption + * level ngtcp2_pktns. + * + * - Handshake packet is only buffered in client Handshake + * encryption level ngtcp2_pktns. + * + * - Short packet is only buffered in Short encryption level + * ngtcp2_pktns. + */ + ngtcp2_pkt_chain *buffed_pkts; + + struct { + /* ect0, ect1, and ce are the number of QUIC packets received + with those markings. */ + size_t ect0; + size_t ect1; + size_t ce; + struct { + /* ect0, ect1, ce are the ECN counts received in the latest + ACK frame. */ + uint64_t ect0; + uint64_t ect1; + uint64_t ce; + } ack; + } ecn; + } rx; + + struct { + struct { + /* frq contains crypto data sorted by their offset. */ + ngtcp2_ksl frq; + /* offset is the offset of crypto stream in this packet number + space. */ + uint64_t offset; + /* ckm is a cryptographic key, and iv to encrypt outgoing + packets. */ + ngtcp2_crypto_km *ckm; + /* hp_ctx is cipher context for packet header protection. */ + ngtcp2_crypto_cipher_ctx hp_ctx; + /* data is the submitted crypto data. */ + ngtcp2_buf_chain *data; + } tx; + + struct { + /* ckm is a cryptographic key, and iv to decrypt incoming + packets. */ + ngtcp2_crypto_km *ckm; + /* hp_ctx is cipher context for packet header protection. */ + ngtcp2_crypto_cipher_ctx hp_ctx; + } rx; + + ngtcp2_strm strm; + ngtcp2_crypto_ctx ctx; + } crypto; + + ngtcp2_acktr acktr; + ngtcp2_rtb rtb; +} ngtcp2_pktns; + +typedef enum ngtcp2_ecn_state { + NGTCP2_ECN_STATE_TESTING, + NGTCP2_ECN_STATE_UNKNOWN, + NGTCP2_ECN_STATE_FAILED, + NGTCP2_ECN_STATE_CAPABLE, +} ngtcp2_ecn_state; + +ngtcp2_static_ringbuf_def(dcid_bound, NGTCP2_MAX_BOUND_DCID_POOL_SIZE, + sizeof(ngtcp2_dcid)); +ngtcp2_static_ringbuf_def(dcid_unused, NGTCP2_MAX_DCID_POOL_SIZE, + sizeof(ngtcp2_dcid)); +ngtcp2_static_ringbuf_def(dcid_retired, NGTCP2_MAX_DCID_RETIRED_SIZE, + sizeof(ngtcp2_dcid)); +ngtcp2_static_ringbuf_def(path_challenge, 4, + sizeof(ngtcp2_path_challenge_entry)); + +ngtcp2_objalloc_def(strm, ngtcp2_strm, oplent); + +struct ngtcp2_conn { + ngtcp2_objalloc frc_objalloc; + ngtcp2_objalloc rtb_entry_objalloc; + ngtcp2_objalloc strm_objalloc; + ngtcp2_conn_state state; + ngtcp2_callbacks callbacks; + /* rcid is a connection ID present in Initial or 0-RTT packet from + client as destination connection ID. Server uses this field to + check that duplicated Initial or 0-RTT packet are indeed sent to + this connection. Client uses this field to validate + original_destination_connection_id transport parameter. */ + ngtcp2_cid rcid; + /* oscid is the source connection ID initially used by the local + endpoint. */ + ngtcp2_cid oscid; + /* retry_scid is the source connection ID from Retry packet. Client + records it in order to verify retry_source_connection_id + transport parameter. Server does not use this field. */ + ngtcp2_cid retry_scid; + ngtcp2_pktns *in_pktns; + ngtcp2_pktns *hs_pktns; + ngtcp2_pktns pktns; + + struct { + /* current is the current destination connection ID. */ + ngtcp2_dcid current; + /* bound is a set of destination connection IDs which are bound to + particular paths. These paths are not validated yet. */ + ngtcp2_static_ringbuf_dcid_bound bound; + /* unused is a set of unused CID received from peer. */ + ngtcp2_static_ringbuf_dcid_unused unused; + /* retired is a set of CID retired by local endpoint. Keep them + in 3*PTO to catch packets in flight along the old path. */ + ngtcp2_static_ringbuf_dcid_retired retired; + /* seqgap tracks received sequence numbers in order to ignore + retransmitted duplicated NEW_CONNECTION_ID frame. */ + ngtcp2_gaptr seqgap; + /* retire_prior_to is the largest retire_prior_to received so + far. */ + uint64_t retire_prior_to; + struct { + /* seqs contains sequence number of Connection ID whose + retirement is not acknowledged by the remote endpoint yet. */ + uint64_t seqs[NGTCP2_MAX_DCID_POOL_SIZE * 2]; + /* len is the number of sequence numbers that seq contains. */ + size_t len; + } retire_unacked; + /* zerolen_seq is a pseudo sequence number of zero-length + Destination Connection ID in order to distinguish between + them. */ + uint64_t zerolen_seq; + } dcid; + + struct { + /* set is a set of CID sent to peer. The peer can use any CIDs in + this set. This includes used CID as well as unused ones. */ + ngtcp2_ksl set; + /* used is a set of CID used by peer. The sort function of this + priority queue takes timestamp when CID is retired and sorts + them in ascending order. */ + ngtcp2_pq used; + /* last_seq is the last sequence number of connection ID. */ + uint64_t last_seq; + /* num_retired is the number of retired Connection ID still + included in set. */ + size_t num_retired; + } scid; + + struct { + /* strmq contains ngtcp2_strm which has frames to send. */ + ngtcp2_pq strmq; + /* strmq_nretrans is the number of entries in strmq which has + stream data to resent. */ + size_t strmq_nretrans; + /* ack is ACK frame. The underlying buffer is reused. */ + ngtcp2_frame *ack; + /* max_ack_ranges is the number of additional ngtcp2_ack_range + which ack can contain. */ + size_t max_ack_ranges; + /* offset is the offset the local endpoint has sent to the remote + endpoint. */ + uint64_t offset; + /* max_offset is the maximum offset that local endpoint can + send. */ + uint64_t max_offset; + /* last_max_data_ts is the timestamp when last MAX_DATA frame is + sent. */ + ngtcp2_tstamp last_max_data_ts; + + struct { + /* state is the state of ECN validation */ + ngtcp2_ecn_state state; + /* validation_start_ts is the timestamp when ECN validation is + started. It is UINT64_MAX if it has not started yet. */ + ngtcp2_tstamp validation_start_ts; + /* dgram_sent is the number of UDP datagram sent during ECN + validation period. */ + size_t dgram_sent; + } ecn; + + struct { + /* pktlen is the number of bytes written before calling + ngtcp2_conn_update_pkt_tx_time which resets this field to + 0. */ + size_t pktlen; + /* next_ts is the time to send next packet. It is UINT64_MAX if + packet pacing is disabled or expired.*/ + ngtcp2_tstamp next_ts; + } pacing; + } tx; + + struct { + /* unsent_max_offset is the maximum offset that remote endpoint + can send without extending MAX_DATA. This limit is not yet + notified to the remote endpoint. */ + uint64_t unsent_max_offset; + /* offset is the cumulative sum of stream data received for this + connection. */ + uint64_t offset; + /* max_offset is the maximum offset that remote endpoint can + send. */ + uint64_t max_offset; + /* window is the connection-level flow control window size. */ + uint64_t window; + /* path_challenge stores received PATH_CHALLENGE data. */ + ngtcp2_static_ringbuf_path_challenge path_challenge; + /* ccerr is the received connection close error. */ + ngtcp2_connection_close_error ccerr; + } rx; + + struct { + ngtcp2_crypto_km *ckm; + ngtcp2_crypto_cipher_ctx hp_ctx; + ngtcp2_crypto_ctx ctx; + /* discard_started_ts is the timestamp when the timer to discard + early key has started. Used by server only. */ + ngtcp2_tstamp discard_started_ts; + /* transport_params is the values remembered by client from the + previous session. These are set by + ngtcp2_conn_set_early_remote_transport_params(). Server does + not use this field. Server must not set values for these + parameters that are smaller than the remembered values. */ + struct { + uint64_t initial_max_streams_bidi; + uint64_t initial_max_streams_uni; + uint64_t initial_max_stream_data_bidi_local; + uint64_t initial_max_stream_data_bidi_remote; + uint64_t initial_max_stream_data_uni; + uint64_t initial_max_data; + uint64_t active_connection_id_limit; + uint64_t max_datagram_frame_size; + } transport_params; + } early; + + struct { + ngtcp2_settings settings; + /* transport_params is the local transport parameters. It is used + for Short packet only. */ + ngtcp2_transport_params transport_params; + struct { + /* max_streams is the maximum number of bidirectional streams which + the local endpoint can open. */ + uint64_t max_streams; + /* next_stream_id is the bidirectional stream ID which the local + endpoint opens next. */ + int64_t next_stream_id; + } bidi; + + struct { + /* max_streams is the maximum number of unidirectional streams + which the local endpoint can open. */ + uint64_t max_streams; + /* next_stream_id is the unidirectional stream ID which the + local endpoint opens next. */ + int64_t next_stream_id; + } uni; + } local; + + struct { + /* transport_params is the received transport parameters during + handshake. It is used for Short packet only. */ + ngtcp2_transport_params *transport_params; + /* pending_transport_params is received transport parameters + during handshake. It is copied to transport_params when 1RTT + key is available. */ + ngtcp2_transport_params *pending_transport_params; + struct { + ngtcp2_idtr idtr; + /* unsent_max_streams is the maximum number of streams of peer + initiated bidirectional stream which the local endpoint can + accept. This limit is not yet notified to the remote + endpoint. */ + uint64_t unsent_max_streams; + /* max_streams is the maximum number of streams of peer + initiated bidirectional stream which the local endpoint can + accept. */ + uint64_t max_streams; + } bidi; + + struct { + ngtcp2_idtr idtr; + /* unsent_max_streams is the maximum number of streams of peer + initiated unidirectional stream which the local endpoint can + accept. This limit is not yet notified to the remote + endpoint. */ + uint64_t unsent_max_streams; + /* max_streams is the maximum number of streams of peer + initiated unidirectional stream which the local endpoint can + accept. */ + uint64_t max_streams; + } uni; + } remote; + + struct { + struct { + /* new_tx_ckm is a new sender 1RTT key which has not been + used. */ + ngtcp2_crypto_km *new_tx_ckm; + /* new_rx_ckm is a new receiver 1RTT key which has not + successfully decrypted incoming packet yet. */ + ngtcp2_crypto_km *new_rx_ckm; + /* old_rx_ckm is an old receiver 1RTT key. */ + ngtcp2_crypto_km *old_rx_ckm; + /* confirmed_ts is the time instant when the key update is + confirmed by the local endpoint last time. UINT64_MAX means + undefined value. */ + ngtcp2_tstamp confirmed_ts; + } key_update; + + /* tls_native_handle is a native handle to TLS session object. */ + void *tls_native_handle; + /* decrypt_hp_buf is a buffer which is used to write unprotected + packet header. */ + ngtcp2_vec decrypt_hp_buf; + /* decrypt_buf is a buffer which is used to write decrypted data. */ + ngtcp2_vec decrypt_buf; + /* retry_aead is AEAD to verify Retry packet integrity. It is + used by client only. */ + ngtcp2_crypto_aead retry_aead; + /* retry_aead_ctx is AEAD cipher context to verify Retry packet + integrity. It is used by client only. */ + ngtcp2_crypto_aead_ctx retry_aead_ctx; + /* tls_error is TLS related error. */ + int tls_error; + /* tls_alert is TLS alert generated by the local endpoint. */ + uint8_t tls_alert; + /* decryption_failure_count is the number of received packets that + fail authentication. */ + uint64_t decryption_failure_count; + } crypto; + + /* pkt contains the packet intermediate construction data to support + NGTCP2_WRITE_STREAM_FLAG_MORE */ + struct { + ngtcp2_crypto_cc cc; + ngtcp2_pkt_hd hd; + ngtcp2_ppe ppe; + ngtcp2_frame_chain **pfrc; + int pkt_empty; + int hd_logged; + /* flags is bitwise OR of zero or more of + NGTCP2_RTB_ENTRY_FLAG_*. */ + uint16_t rtb_entry_flags; + ngtcp2_ssize hs_spktlen; + int require_padding; + } pkt; + + struct { + /* last_ts is a timestamp when a last packet is sent or received + on a current path. */ + ngtcp2_tstamp last_ts; + /* timeout is keep-alive timeout. When it expires, a packet + should be sent to a current path to keep connection alive. It + might be used to keep NAT binding intact. If 0 is set, + keep-alive timer is disabled. */ + ngtcp2_duration timeout; + } keep_alive; + + struct { + /* Initial keys for negotiated version. If original version == + negotiated version, these fields are not used. */ + struct { + ngtcp2_crypto_km *ckm; + ngtcp2_crypto_cipher_ctx hp_ctx; + } rx; + struct { + ngtcp2_crypto_km *ckm; + ngtcp2_crypto_cipher_ctx hp_ctx; + } tx; + /* version is QUIC version that the above Initial keys are created + for. */ + uint32_t version; + /* preferred_versions is the array of versions that are preferred + by the local endpoint. Server negotiates one of those versions + in this array if a client initially selects a less preferred + version. Client uses this field and original_version field to + prevent version downgrade attack if it reacted upon Version + Negotiation packet. */ + uint32_t *preferred_versions; + /* preferred_versionslen is the number of versions stored in the + array pointed by preferred_versions. This field is only used + by server. */ + size_t preferred_versionslen; + /* available_versions is the versions that the local endpoint + sends in version_information transport parameter. This is the + wire image of available_versions field of version_information + transport parameter. */ + uint8_t *available_versions; + /* available_versionslen is the length of data pointed by + available_versions field. */ + size_t available_versionslen; + } vneg; + + ngtcp2_map strms; + ngtcp2_conn_stat cstat; + ngtcp2_pv *pv; + ngtcp2_pmtud *pmtud; + ngtcp2_log log; + ngtcp2_qlog qlog; + ngtcp2_rst rst; + ngtcp2_cc_algo cc_algo; + ngtcp2_cc cc; + const ngtcp2_mem *mem; + /* idle_ts is the time instant when idle timer started. */ + ngtcp2_tstamp idle_ts; + void *user_data; + uint32_t client_chosen_version; + uint32_t negotiated_version; + /* flags is bitwise OR of zero or more of NGTCP2_CONN_FLAG_*. */ + uint32_t flags; + int server; +}; + +typedef enum ngtcp2_vmsg_type { + NGTCP2_VMSG_TYPE_STREAM, + NGTCP2_VMSG_TYPE_DATAGRAM, +} ngtcp2_vmsg_type; + +typedef struct ngtcp2_vmsg_stream { + /* strm is a stream that data is sent to. */ + ngtcp2_strm *strm; + /* flags is bitwise OR of zero or more of + NGTCP2_WRITE_STREAM_FLAG_*. */ + uint32_t flags; + /* data is the pointer to ngtcp2_vec array which contains the stream + data to send. */ + const ngtcp2_vec *data; + /* datacnt is the number of ngtcp2_vec pointed by data. */ + size_t datacnt; + /* pdatalen is the pointer to the variable which the number of bytes + written is assigned to if pdatalen is not NULL. */ + ngtcp2_ssize *pdatalen; +} ngtcp2_vmsg_stream; + +typedef struct ngtcp2_vmsg_datagram { + /* data is the pointer to ngtcp2_vec array which contains the data + to send. */ + const ngtcp2_vec *data; + /* datacnt is the number of ngtcp2_vec pointed by data. */ + size_t datacnt; + /* dgram_id is an opaque identifier chosen by an application. */ + uint64_t dgram_id; + /* flags is bitwise OR of zero or more of + NGTCP2_WRITE_DATAGRAM_FLAG_*. */ + uint32_t flags; + /* paccepted is the pointer to the variable which, if it is not + NULL, is assigned nonzero if data is written to a packet. */ + int *paccepted; +} ngtcp2_vmsg_datagram; + +typedef struct ngtcp2_vmsg { + ngtcp2_vmsg_type type; + union { + ngtcp2_vmsg_stream stream; + ngtcp2_vmsg_datagram datagram; + }; +} ngtcp2_vmsg; + +/* + * ngtcp2_conn_sched_ack stores packet number |pkt_num| and its + * reception timestamp |ts| in order to send its ACK. + * + * It returns 0 if it succeeds, or one of the following negative error + * codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_PROTO + * Same packet number has already been added. + */ +int ngtcp2_conn_sched_ack(ngtcp2_conn *conn, ngtcp2_acktr *acktr, + int64_t pkt_num, int active_ack, ngtcp2_tstamp ts); + +/* + * ngtcp2_conn_find_stream returns a stream whose stream ID is + * |stream_id|. If no such stream is found, it returns NULL. + */ +ngtcp2_strm *ngtcp2_conn_find_stream(ngtcp2_conn *conn, int64_t stream_id); + +/* + * conn_init_stream initializes |strm|. Its stream ID is |stream_id|. + * This function adds |strm| to conn->strms. |strm| must be allocated + * by the caller. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + * NGTCP2_ERR_CALLBACK_FAILURE + * User-callback function failed. + */ +int ngtcp2_conn_init_stream(ngtcp2_conn *conn, ngtcp2_strm *strm, + int64_t stream_id, void *stream_user_data); + +/* + * ngtcp2_conn_close_stream closes stream |strm|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * Stream is not found. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +int ngtcp2_conn_close_stream(ngtcp2_conn *conn, ngtcp2_strm *strm); + +/* + * ngtcp2_conn_close_stream closes stream |strm| if no further + * transmission and reception are allowed, and all reordered incoming + * data are emitted to the application, and the transmitted data are + * acked. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * Stream is not found. + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +int ngtcp2_conn_close_stream_if_shut_rdwr(ngtcp2_conn *conn, ngtcp2_strm *strm); + +/* + * ngtcp2_conn_update_rtt updates RTT measurements. |rtt| is a latest + * RTT which is not adjusted by ack delay. |ack_delay| is unscaled + * ack_delay included in ACK frame. |ack_delay| is actually tainted + * (sent by peer), so don't assume that |ack_delay| is always smaller + * than, or equals to |rtt|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * RTT sample is ignored. + */ +int ngtcp2_conn_update_rtt(ngtcp2_conn *conn, ngtcp2_duration rtt, + ngtcp2_duration ack_delay, ngtcp2_tstamp ts); + +void ngtcp2_conn_set_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts); + +int ngtcp2_conn_on_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts); + +/* + * ngtcp2_conn_detect_lost_pkt detects lost packets. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_conn_detect_lost_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); + +/* + * ngtcp2_conn_tx_strmq_top returns the ngtcp2_strm which sits on the + * top of queue. tx_strmq must not be empty. + */ +ngtcp2_strm *ngtcp2_conn_tx_strmq_top(ngtcp2_conn *conn); + +/* + * ngtcp2_conn_tx_strmq_pop pops the ngtcp2_strm from the queue. + * tx_strmq must not be empty. + */ +void ngtcp2_conn_tx_strmq_pop(ngtcp2_conn *conn); + +/* + * ngtcp2_conn_tx_strmq_push pushes |strm| into tx_strmq. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_conn_tx_strmq_push(ngtcp2_conn *conn, ngtcp2_strm *strm); + +/* + * ngtcp2_conn_internal_expiry returns the minimum expiry time among + * all timers in |conn|. + */ +ngtcp2_tstamp ngtcp2_conn_internal_expiry(ngtcp2_conn *conn); + +ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path, + int pkt_info_version, ngtcp2_pkt_info *pi, + uint8_t *dest, size_t destlen, + ngtcp2_vmsg *vmsg, ngtcp2_tstamp ts); + +/* + * ngtcp2_conn_write_single_frame_pkt writes a packet which contains + * |fr| frame only in the buffer pointed by |dest| whose length if + * |destlen|. |type| is a long packet type to send. If |type| is 0, + * Short packet is used. |dcid| is used as a destination connection + * ID. |flags| is zero or more of NGTCP2_WRITE_PKT_FLAG_*. Only + * NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING is recognized. + * + * The packet written by this function will not be retransmitted. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt( + ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, + uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr, + uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts); + +/* + * ngtcp2_conn_commit_local_transport_params commits the local + * transport parameters, which is currently set to + * conn->local.settings.transport_params. This function will do some + * amends on transport parameters for adjusting default values. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_INVALID_ARGUMENT + * CID in preferred address equals to the original SCID. + */ +int ngtcp2_conn_commit_local_transport_params(ngtcp2_conn *conn); + +/* + * ngtcp2_conn_lost_pkt_expiry returns the earliest expiry time of + * lost packet. + */ +ngtcp2_tstamp ngtcp2_conn_lost_pkt_expiry(ngtcp2_conn *conn); + +/* + * ngtcp2_conn_remove_lost_pkt removes the expired lost packet. + */ +void ngtcp2_conn_remove_lost_pkt(ngtcp2_conn *conn, ngtcp2_tstamp ts); + +/* + * ngtcp2_conn_resched_frames reschedules frames linked from |*pfrc| + * for retransmission. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_conn_resched_frames(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_frame_chain **pfrc); + +uint64_t ngtcp2_conn_tx_strmq_first_cycle(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_ack_delay_expiry` returns the expiry time point of + * delayed protected ACK. One should call + * `ngtcp2_conn_cancel_expired_ack_delay_timer` and + * `ngtcp2_conn_write_pkt` (or `ngtcp2_conn_writev_stream`) when it + * expires. It returns UINT64_MAX if there is no expiry. + */ +ngtcp2_tstamp ngtcp2_conn_ack_delay_expiry(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_cancel_expired_ack_delay_timer` stops expired ACK + * delay timer. |ts| is the current time. This function must be + * called when `ngtcp2_conn_ack_delay_expiry` <= ts. + */ +void ngtcp2_conn_cancel_expired_ack_delay_timer(ngtcp2_conn *conn, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_loss_detection_expiry` returns the expiry time point + * of loss detection timer. One should call + * `ngtcp2_conn_on_loss_detection_timer` and `ngtcp2_conn_write_pkt` + * (or `ngtcp2_conn_writev_stream`) when it expires. It returns + * UINT64_MAX if loss detection timer is not armed. + */ +ngtcp2_tstamp ngtcp2_conn_loss_detection_expiry(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_idle_expiry` returns the time when a connection + * should be closed if it continues to be idle. If idle timeout is + * disabled, this function returns ``UINT64_MAX``. + */ +ngtcp2_tstamp ngtcp2_conn_get_idle_expiry(ngtcp2_conn *conn); + +ngtcp2_duration ngtcp2_conn_compute_pto(ngtcp2_conn *conn, ngtcp2_pktns *pktns); + +/* + * ngtcp2_conn_track_retired_dcid_seq tracks the sequence number |seq| + * of unacknowledged retiring Destination Connection ID. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_CONNECTION_ID_LIMIT + * The number of unacknowledged retirement exceeds the limit. + */ +int ngtcp2_conn_track_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq); + +/* + * ngtcp2_conn_untrack_retired_dcid_seq deletes the sequence number + * |seq| of unacknowledged retiring Destination Connection ID. It is + * fine if such sequence number is not found. + */ +void ngtcp2_conn_untrack_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq); + +/* + * ngtcp2_conn_server_negotiate_version negotiates QUIC version. It + * is compatible version negotiation. It returns the negotiated QUIC + * version. This function must not be called by client. + */ +uint32_t +ngtcp2_conn_server_negotiate_version(ngtcp2_conn *conn, + const ngtcp2_version_info *version_info); + +/** + * @function + * + * `ngtcp2_conn_write_connection_close_pkt` writes a packet which + * contains a CONNECTION_CLOSE frame (type 0x1c) in the buffer pointed + * by |dest| whose capacity is |datalen|. + * + * If |path| is not ``NULL``, this function stores the network path + * with which the packet should be sent. Each addr field must point + * to the buffer which should be at least ``sizeof(struct + * sockaddr_storage)`` bytes long. The assignment might not be done + * if nothing is written to |dest|. + * + * If |pi| is not ``NULL``, this function stores packet metadata in it + * if it succeeds. The metadata includes ECN markings. + * + * This function must not be called from inside the callback + * functions. + * + * At the moment, successful call to this function makes connection + * close. We may change this behaviour in the future to allow + * graceful shutdown. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small + * :macro:`NGTCP2_ERR_INVALID_STATE` + * The current state does not allow sending CONNECTION_CLOSE. + * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED` + * Packet number is exhausted, and cannot send any more packet. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + */ +ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt( + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_write_application_close_pkt` writes a packet which + * contains a CONNECTION_CLOSE frame (type 0x1d) in the buffer pointed + * by |dest| whose capacity is |datalen|. + * + * If |path| is not ``NULL``, this function stores the network path + * with which the packet should be sent. Each addr field must point + * to the buffer which should be at least ``sizeof(struct + * sockaddr_storage)`` bytes long. The assignment might not be done + * if nothing is written to |dest|. + * + * If |pi| is not ``NULL``, this function stores packet metadata in it + * if it succeeds. The metadata includes ECN markings. + * + * If handshake has not been confirmed yet, CONNECTION_CLOSE (type + * 0x1c) with error code :macro:`NGTCP2_APPLICATION_ERROR` is written + * instead. + * + * This function must not be called from inside the callback + * functions. + * + * At the moment, successful call to this function makes connection + * close. We may change this behaviour in the future to allow + * graceful shutdown. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small + * :macro:`NGTCP2_ERR_INVALID_STATE` + * The current state does not allow sending CONNECTION_CLOSE. + * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED` + * Packet number is exhausted, and cannot send any more packet. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + */ +ngtcp2_ssize ngtcp2_conn_write_application_close_pkt( + ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest, + size_t destlen, uint64_t app_error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_tstamp ts); + +int ngtcp2_conn_start_pmtud(ngtcp2_conn *conn); + +void ngtcp2_conn_stop_pmtud(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_remote_transport_params` sets transport parameter + * |params| from a remote endpoint to |conn|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_TRANSPORT_PARAM` + * Failed to validate a remote transport parameters. + * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE` + * Version negotiation failure. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +int ngtcp2_conn_set_remote_transport_params( + ngtcp2_conn *conn, const ngtcp2_transport_params *params); + +#endif /* NGTCP2_CONN_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c new file mode 100644 index 0000000..3367217 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c @@ -0,0 +1,291 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_conv.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_str.h" +#include "ngtcp2_pkt.h" +#include "ngtcp2_net.h" +#include "ngtcp2_unreachable.h" + +const uint8_t *ngtcp2_get_uint64(uint64_t *dest, const uint8_t *p) { + uint64_t n; + memcpy(&n, p, sizeof(n)); + *dest = ngtcp2_ntohl64(n); + return p + sizeof(n); +} + +const uint8_t *ngtcp2_get_uint48(uint64_t *dest, const uint8_t *p) { + uint64_t n = 0; + memcpy(((uint8_t *)&n) + 2, p, 6); + *dest = ngtcp2_ntohl64(n); + return p + 6; +} + +const uint8_t *ngtcp2_get_uint32(uint32_t *dest, const uint8_t *p) { + uint32_t n; + memcpy(&n, p, sizeof(n)); + *dest = ngtcp2_ntohl(n); + return p + sizeof(n); +} + +const uint8_t *ngtcp2_get_uint24(uint32_t *dest, const uint8_t *p) { + uint32_t n = 0; + memcpy(((uint8_t *)&n) + 1, p, 3); + *dest = ngtcp2_ntohl(n); + return p + 3; +} + +const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p) { + uint16_t n; + memcpy(&n, p, sizeof(n)); + *dest = ngtcp2_ntohs(n); + return p + sizeof(n); +} + +const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p) { + memcpy(dest, p, sizeof(*dest)); + return p + sizeof(*dest); +} + +static uint64_t get_uvarint(size_t *plen, const uint8_t *p) { + union { + uint8_t n8; + uint16_t n16; + uint32_t n32; + uint64_t n64; + } n; + + *plen = (size_t)(1u << (*p >> 6)); + + switch (*plen) { + case 1: + return *p; + case 2: + memcpy(&n, p, 2); + n.n8 &= 0x3f; + return ngtcp2_ntohs(n.n16); + case 4: + memcpy(&n, p, 4); + n.n8 &= 0x3f; + return ngtcp2_ntohl(n.n32); + case 8: + memcpy(&n, p, 8); + n.n8 &= 0x3f; + return ngtcp2_ntohl64(n.n64); + default: + ngtcp2_unreachable(); + } +} + +const uint8_t *ngtcp2_get_uvarint(uint64_t *dest, const uint8_t *p) { + size_t len; + + *dest = get_uvarint(&len, p); + + return p + len; +} + +const uint8_t *ngtcp2_get_varint(int64_t *dest, const uint8_t *p) { + size_t len; + + *dest = (int64_t)get_uvarint(&len, p); + + return p + len; +} + +int64_t ngtcp2_get_pkt_num(const uint8_t *p, size_t pkt_numlen) { + uint32_t l; + uint16_t s; + + switch (pkt_numlen) { + case 1: + return *p; + case 2: + ngtcp2_get_uint16(&s, p); + return (int64_t)s; + case 3: + ngtcp2_get_uint24(&l, p); + return (int64_t)l; + case 4: + ngtcp2_get_uint32(&l, p); + return (int64_t)l; + default: + ngtcp2_unreachable(); + } +} + +uint8_t *ngtcp2_put_uint64be(uint8_t *p, uint64_t n) { + n = ngtcp2_htonl64(n); + return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n)); +} + +uint8_t *ngtcp2_put_uint48be(uint8_t *p, uint64_t n) { + n = ngtcp2_htonl64(n); + return ngtcp2_cpymem(p, ((const uint8_t *)&n) + 2, 6); +} + +uint8_t *ngtcp2_put_uint32be(uint8_t *p, uint32_t n) { + n = ngtcp2_htonl(n); + return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n)); +} + +uint8_t *ngtcp2_put_uint24be(uint8_t *p, uint32_t n) { + n = ngtcp2_htonl(n); + return ngtcp2_cpymem(p, ((const uint8_t *)&n) + 1, 3); +} + +uint8_t *ngtcp2_put_uint16be(uint8_t *p, uint16_t n) { + n = ngtcp2_htons(n); + return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n)); +} + +uint8_t *ngtcp2_put_uint16(uint8_t *p, uint16_t n) { + return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n)); +} + +uint8_t *ngtcp2_put_uvarint(uint8_t *p, uint64_t n) { + uint8_t *rv; + if (n < 64) { + *p++ = (uint8_t)n; + return p; + } + if (n < 16384) { + rv = ngtcp2_put_uint16be(p, (uint16_t)n); + *p |= 0x40; + return rv; + } + if (n < 1073741824) { + rv = ngtcp2_put_uint32be(p, (uint32_t)n); + *p |= 0x80; + return rv; + } + assert(n < 4611686018427387904ULL); + rv = ngtcp2_put_uint64be(p, n); + *p |= 0xc0; + return rv; +} + +uint8_t *ngtcp2_put_uvarint30(uint8_t *p, uint32_t n) { + uint8_t *rv; + + assert(n < 1073741824); + + rv = ngtcp2_put_uint32be(p, n); + *p |= 0x80; + + return rv; +} + +uint8_t *ngtcp2_put_pkt_num(uint8_t *p, int64_t pkt_num, size_t len) { + switch (len) { + case 1: + *p++ = (uint8_t)pkt_num; + return p; + case 2: + ngtcp2_put_uint16be(p, (uint16_t)pkt_num); + return p + 2; + case 3: + ngtcp2_put_uint24be(p, (uint32_t)pkt_num); + return p + 3; + case 4: + ngtcp2_put_uint32be(p, (uint32_t)pkt_num); + return p + 4; + default: + ngtcp2_unreachable(); + } +} + +size_t ngtcp2_get_uvarintlen(const uint8_t *p) { + return (size_t)(1u << (*p >> 6)); +} + +size_t ngtcp2_put_uvarintlen(uint64_t n) { + if (n < 64) { + return 1; + } + if (n < 16384) { + return 2; + } + if (n < 1073741824) { + return 4; + } + assert(n < 4611686018427387904ULL); + return 8; +} + +int64_t ngtcp2_nth_server_bidi_id(uint64_t n) { + if (n == 0) { + return 0; + } + + if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { + return NGTCP2_MAX_SERVER_STREAM_ID_BIDI; + } + + return (int64_t)(((n - 1) << 2) | 0x01); +} + +int64_t ngtcp2_nth_client_bidi_id(uint64_t n) { + if (n == 0) { + return 0; + } + + if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { + return NGTCP2_MAX_CLIENT_STREAM_ID_BIDI; + } + + return (int64_t)((n - 1) << 2); +} + +int64_t ngtcp2_nth_server_uni_id(uint64_t n) { + if (n == 0) { + return 0; + } + + if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { + return NGTCP2_MAX_SERVER_STREAM_ID_UNI; + } + + return (int64_t)(((n - 1) << 2) | 0x03); +} + +int64_t ngtcp2_nth_client_uni_id(uint64_t n) { + if (n == 0) { + return 0; + } + + if ((NGTCP2_MAX_VARINT >> 2) < n - 1) { + return NGTCP2_MAX_CLIENT_STREAM_ID_UNI; + } + + return (int64_t)(((n - 1) << 2) | 0x02); +} + +uint64_t ngtcp2_ord_stream_id(int64_t stream_id) { + return (uint64_t)(stream_id >> 2) + 1; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h new file mode 100644 index 0000000..ef089a9 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h @@ -0,0 +1,208 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_CONV_H +#define NGTCP2_CONV_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +/* + * ngtcp2_get_uint64 reads 8 bytes from |p| as 64 bits unsigned + * integer encoded as network byte order, and stores it in the buffer + * pointed by |dest| in host byte order. It returns |p| + 8. + */ +const uint8_t *ngtcp2_get_uint64(uint64_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_uint48 reads 6 bytes from |p| as 48 bits unsigned + * integer encoded as network byte order, and stores it in the buffer + * pointed by |dest| in host byte order. It returns |p| + 6. + */ +const uint8_t *ngtcp2_get_uint48(uint64_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_uint32 reads 4 bytes from |p| as 32 bits unsigned + * integer encoded as network byte order, and stores it in the buffer + * pointed by |dest| in host byte order. It returns |p| + 4. + */ +const uint8_t *ngtcp2_get_uint32(uint32_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_uint24 reads 3 bytes from |p| as 24 bits unsigned + * integer encoded as network byte order, and stores it in the buffer + * pointed by |dest| in host byte order. It returns |p| + 3. + */ +const uint8_t *ngtcp2_get_uint24(uint32_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_uint16 reads 2 bytes from |p| as 16 bits unsigned + * integer encoded as network byte order, and stores it in the buffer + * pointed by |dest| in host byte order. It returns |p| + 2. + */ +const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_uint16be reads 2 bytes from |p| as 16 bits unsigned + * integer encoded as network byte order, and stores it in the buffer + * pointed by |dest| as is. It returns |p| + 2. + */ +const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_uvarint reads variable-length unsigned integer from |p|, + * and stores it in the buffer pointed by |dest| in host byte order. + * It returns |p| plus the number of bytes read from |p|. + */ +const uint8_t *ngtcp2_get_uvarint(uint64_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_varint reads variable-length unsigned integer from |p|, + * and casts it to the signed integer, and stores it in the buffer + * pointed by |dest| in host byte order. No information should be + * lost in this cast, because the variable-length integer is 62 + * bits. It returns |p| plus the number of bytes read from |p|. + */ +const uint8_t *ngtcp2_get_varint(int64_t *dest, const uint8_t *p); + +/* + * ngtcp2_get_pkt_num reads encoded packet number from |p|. The + * packet number is encoed in |pkt_numlen| bytes. + */ +int64_t ngtcp2_get_pkt_num(const uint8_t *p, size_t pkt_numlen); + +/* + * ngtcp2_put_uint64be writes |n| in host byte order in |p| in network + * byte order. It returns the one beyond of the last written + * position. + */ +uint8_t *ngtcp2_put_uint64be(uint8_t *p, uint64_t n); + +/* + * ngtcp2_put_uint48be writes |n| in host byte order in |p| in network + * byte order. It writes only least significant 48 bits. It returns + * the one beyond of the last written position. + */ +uint8_t *ngtcp2_put_uint48be(uint8_t *p, uint64_t n); + +/* + * ngtcp2_put_uint32be writes |n| in host byte order in |p| in network + * byte order. It returns the one beyond of the last written + * position. + */ +uint8_t *ngtcp2_put_uint32be(uint8_t *p, uint32_t n); + +/* + * ngtcp2_put_uint24be writes |n| in host byte order in |p| in network + * byte order. It writes only least significant 24 bits. It returns + * the one beyond of the last written position. + */ +uint8_t *ngtcp2_put_uint24be(uint8_t *p, uint32_t n); + +/* + * ngtcp2_put_uint16be writes |n| in host byte order in |p| in network + * byte order. It returns the one beyond of the last written + * position. + */ +uint8_t *ngtcp2_put_uint16be(uint8_t *p, uint16_t n); + +/* + * ngtcp2_put_uint16 writes |n| as is in |p|. It returns the one + * beyond of the last written position. + */ +uint8_t *ngtcp2_put_uint16(uint8_t *p, uint16_t n); + +/* + * ngtcp2_put_uvarint writes |n| in |p| using variable-length integer + * encoding. It returns the one beyond of the last written position. + */ +uint8_t *ngtcp2_put_uvarint(uint8_t *p, uint64_t n); + +/* + * ngtcp2_put_uvarint30 writes |n| in |p| using variable-length + * integer encoding. |n| must be strictly less than 1073741824. The + * function always encodes |n| in 4 bytes. It returns the one beyond + * of the last written position. + */ +uint8_t *ngtcp2_put_uvarint30(uint8_t *p, uint32_t n); + +/* + * ngtcp2_put_pkt_num encodes |pkt_num| using |len| bytes. It + * returns the one beyond of the last written position. + */ +uint8_t *ngtcp2_put_pkt_num(uint8_t *p, int64_t pkt_num, size_t len); + +/* + * ngtcp2_get_uvarintlen returns the required number of bytes to read + * variable-length integer starting at |p|. + */ +size_t ngtcp2_get_uvarintlen(const uint8_t *p); + +/* + * ngtcp2_put_uvarintlen returns the required number of bytes to + * encode |n|. + */ +size_t ngtcp2_put_uvarintlen(uint64_t n); + +/* + * ngtcp2_nth_server_bidi_id returns |n|-th server bidirectional + * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is + * larger than NGTCP2_MAX_SERVER_STREAM_ID_BIDI, this function returns + * NGTCP2_MAX_SERVER_STREAM_ID_BIDI. + */ +int64_t ngtcp2_nth_server_bidi_id(uint64_t n); + +/* + * ngtcp2_nth_client_bidi_id returns |n|-th client bidirectional + * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is + * larger than NGTCP2_MAX_CLIENT_STREAM_ID_BIDI, this function returns + * NGTCP2_MAX_CLIENT_STREAM_ID_BIDI. + */ +int64_t ngtcp2_nth_client_bidi_id(uint64_t n); + +/* + * ngtcp2_nth_server_uni_id returns |n|-th server unidirectional + * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is + * larger than NGTCP2_MAX_SERVER_STREAM_ID_UNI, this function returns + * NGTCP2_MAX_SERVER_STREAM_ID_UNI. + */ +int64_t ngtcp2_nth_server_uni_id(uint64_t n); + +/* + * ngtcp2_nth_client_uni_id returns |n|-th client unidirectional + * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is + * larger than NGTCP2_MAX_CLIENT_STREAM_ID_UNI, this function returns + * NGTCP2_MAX_CLIENT_STREAM_ID_UNI. + */ +int64_t ngtcp2_nth_client_uni_id(uint64_t n); + +/* + * ngtcp2_ord_stream_id returns the ordinal number of |stream_id|. + */ +uint64_t ngtcp2_ord_stream_id(int64_t stream_id); + +#endif /* NGTCP2_CONV_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c new file mode 100644 index 0000000..24210a2 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c @@ -0,0 +1,895 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_crypto.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_str.h" +#include "ngtcp2_conv.h" +#include "ngtcp2_conn.h" +#include "ngtcp2_net.h" + +int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret, + size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, + const ngtcp2_mem *mem) { + int rv = ngtcp2_crypto_km_nocopy_new(pckm, secretlen, ivlen, mem); + if (rv != 0) { + return rv; + } + + if (secretlen) { + memcpy((*pckm)->secret.base, secret, secretlen); + } + if (aead_ctx) { + (*pckm)->aead_ctx = *aead_ctx; + } + memcpy((*pckm)->iv.base, iv, ivlen); + + return 0; +} + +int ngtcp2_crypto_km_nocopy_new(ngtcp2_crypto_km **pckm, size_t secretlen, + size_t ivlen, const ngtcp2_mem *mem) { + size_t len; + uint8_t *p; + + len = sizeof(ngtcp2_crypto_km) + secretlen + ivlen; + + *pckm = ngtcp2_mem_malloc(mem, len); + if (*pckm == NULL) { + return NGTCP2_ERR_NOMEM; + } + + p = (uint8_t *)(*pckm) + sizeof(ngtcp2_crypto_km); + (*pckm)->secret.base = p; + (*pckm)->secret.len = secretlen; + p += secretlen; + (*pckm)->iv.base = p; + (*pckm)->iv.len = ivlen; + (*pckm)->aead_ctx.native_handle = NULL; + (*pckm)->pkt_num = -1; + (*pckm)->use_count = 0; + (*pckm)->flags = NGTCP2_CRYPTO_KM_FLAG_NONE; + + return 0; +} + +void ngtcp2_crypto_km_del(ngtcp2_crypto_km *ckm, const ngtcp2_mem *mem) { + if (ckm == NULL) { + return; + } + + ngtcp2_mem_free(mem, ckm); +} + +void ngtcp2_crypto_create_nonce(uint8_t *dest, const uint8_t *iv, size_t ivlen, + int64_t pkt_num) { + size_t i; + uint64_t n; + + assert(ivlen >= 8); + + memcpy(dest, iv, ivlen); + n = ngtcp2_htonl64((uint64_t)pkt_num); + + for (i = 0; i < 8; ++i) { + dest[ivlen - 8 + i] ^= ((uint8_t *)&n)[i]; + } +} + +/* + * varint_paramlen returns the length of a single transport parameter + * which has variable integer in its parameter. + */ +static size_t varint_paramlen(ngtcp2_transport_param_id id, uint64_t param) { + size_t valuelen = ngtcp2_put_uvarintlen(param); + return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(valuelen) + valuelen; +} + +/* + * write_varint_param writes parameter |id| of the given |value| in + * varint encoding. It returns p + the number of bytes written. + */ +static uint8_t *write_varint_param(uint8_t *p, ngtcp2_transport_param_id id, + uint64_t value) { + p = ngtcp2_put_uvarint(p, id); + p = ngtcp2_put_uvarint(p, ngtcp2_put_uvarintlen(value)); + return ngtcp2_put_uvarint(p, value); +} + +/* + * cid_paramlen returns the length of a single transport parameter + * which has |cid| as value. + */ +static size_t cid_paramlen(ngtcp2_transport_param_id id, + const ngtcp2_cid *cid) { + return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(cid->datalen) + + cid->datalen; +} + +/* + * write_cid_param writes parameter |id| of the given |cid|. It + * returns p + the number of bytes written. + */ +static uint8_t *write_cid_param(uint8_t *p, ngtcp2_transport_param_id id, + const ngtcp2_cid *cid) { + assert(cid->datalen == 0 || cid->datalen >= NGTCP2_MIN_CIDLEN); + assert(cid->datalen <= NGTCP2_MAX_CIDLEN); + + p = ngtcp2_put_uvarint(p, id); + p = ngtcp2_put_uvarint(p, cid->datalen); + if (cid->datalen) { + p = ngtcp2_cpymem(p, cid->data, cid->datalen); + } + return p; +} + +static const uint8_t empty_address[16]; + +ngtcp2_ssize ngtcp2_encode_transport_params_versioned( + uint8_t *dest, size_t destlen, ngtcp2_transport_params_type exttype, + int transport_params_version, const ngtcp2_transport_params *params) { + uint8_t *p; + size_t len = 0; + /* For some reason, gcc 7.3.0 requires this initialization. */ + size_t preferred_addrlen = 0; + size_t version_infolen = 0; + const ngtcp2_sockaddr_in *sa_in; + const ngtcp2_sockaddr_in6 *sa_in6; + (void)transport_params_version; + + switch (exttype) { + case NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO: + break; + case NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS: + len += + cid_paramlen(NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID, + ¶ms->original_dcid); + + if (params->stateless_reset_token_present) { + len += + ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN) + + ngtcp2_put_uvarintlen(NGTCP2_STATELESS_RESET_TOKENLEN) + + NGTCP2_STATELESS_RESET_TOKENLEN; + } + if (params->preferred_address_present) { + assert(params->preferred_address.cid.datalen >= NGTCP2_MIN_CIDLEN); + assert(params->preferred_address.cid.datalen <= NGTCP2_MAX_CIDLEN); + preferred_addrlen = 4 /* ipv4Address */ + 2 /* ipv4Port */ + + 16 /* ipv6Address */ + 2 /* ipv6Port */ + + 1 + + params->preferred_address.cid.datalen /* CID */ + + NGTCP2_STATELESS_RESET_TOKENLEN; + len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS) + + ngtcp2_put_uvarintlen(preferred_addrlen) + preferred_addrlen; + } + if (params->retry_scid_present) { + len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID, + ¶ms->retry_scid); + } + break; + default: + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID, + ¶ms->initial_scid); + + if (params->initial_max_stream_data_bidi_local) { + len += varint_paramlen( + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL, + params->initial_max_stream_data_bidi_local); + } + if (params->initial_max_stream_data_bidi_remote) { + len += varint_paramlen( + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE, + params->initial_max_stream_data_bidi_remote); + } + if (params->initial_max_stream_data_uni) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI, + params->initial_max_stream_data_uni); + } + if (params->initial_max_data) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA, + params->initial_max_data); + } + if (params->initial_max_streams_bidi) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI, + params->initial_max_streams_bidi); + } + if (params->initial_max_streams_uni) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI, + params->initial_max_streams_uni); + } + if (params->max_udp_payload_size != + NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE, + params->max_udp_payload_size); + } + if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT, + params->ack_delay_exponent); + } + if (params->disable_active_migration) { + len += + ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION) + + ngtcp2_put_uvarintlen(0); + } + if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY, + params->max_ack_delay / NGTCP2_MILLISECONDS); + } + if (params->max_idle_timeout) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT, + params->max_idle_timeout / NGTCP2_MILLISECONDS); + } + if (params->active_connection_id_limit && + params->active_connection_id_limit != + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT, + params->active_connection_id_limit); + } + if (params->max_datagram_frame_size) { + len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE, + params->max_datagram_frame_size); + } + if (params->grease_quic_bit) { + len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT) + + ngtcp2_put_uvarintlen(0); + } + if (params->version_info_present) { + version_infolen = + sizeof(uint32_t) + params->version_info.available_versionslen; + len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION) + + ngtcp2_put_uvarintlen(version_infolen) + version_infolen; + } + + if (dest == NULL && destlen == 0) { + return (ngtcp2_ssize)len; + } + + if (destlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = dest; + + if (exttype == NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) { + p = write_cid_param( + p, NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID, + ¶ms->original_dcid); + + if (params->stateless_reset_token_present) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN); + p = ngtcp2_put_uvarint(p, sizeof(params->stateless_reset_token)); + p = ngtcp2_cpymem(p, params->stateless_reset_token, + sizeof(params->stateless_reset_token)); + } + if (params->preferred_address_present) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS); + p = ngtcp2_put_uvarint(p, preferred_addrlen); + + if (params->preferred_address.ipv4_present) { + sa_in = ¶ms->preferred_address.ipv4; + p = ngtcp2_cpymem(p, &sa_in->sin_addr, sizeof(sa_in->sin_addr)); + p = ngtcp2_put_uint16(p, sa_in->sin_port); + } else { + p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in->sin_addr)); + p = ngtcp2_put_uint16(p, 0); + } + + if (params->preferred_address.ipv6_present) { + sa_in6 = ¶ms->preferred_address.ipv6; + p = ngtcp2_cpymem(p, &sa_in6->sin6_addr, sizeof(sa_in6->sin6_addr)); + p = ngtcp2_put_uint16(p, sa_in6->sin6_port); + } else { + p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in6->sin6_addr)); + p = ngtcp2_put_uint16(p, 0); + } + + *p++ = (uint8_t)params->preferred_address.cid.datalen; + if (params->preferred_address.cid.datalen) { + p = ngtcp2_cpymem(p, params->preferred_address.cid.data, + params->preferred_address.cid.datalen); + } + p = ngtcp2_cpymem( + p, params->preferred_address.stateless_reset_token, + sizeof(params->preferred_address.stateless_reset_token)); + } + if (params->retry_scid_present) { + p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID, + ¶ms->retry_scid); + } + } + + p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID, + ¶ms->initial_scid); + + if (params->initial_max_stream_data_bidi_local) { + p = write_varint_param( + p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL, + params->initial_max_stream_data_bidi_local); + } + + if (params->initial_max_stream_data_bidi_remote) { + p = write_varint_param( + p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE, + params->initial_max_stream_data_bidi_remote); + } + + if (params->initial_max_stream_data_uni) { + p = write_varint_param(p, + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI, + params->initial_max_stream_data_uni); + } + + if (params->initial_max_data) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA, + params->initial_max_data); + } + + if (params->initial_max_streams_bidi) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI, + params->initial_max_streams_bidi); + } + + if (params->initial_max_streams_uni) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI, + params->initial_max_streams_uni); + } + + if (params->max_udp_payload_size != + NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE, + params->max_udp_payload_size); + } + + if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT, + params->ack_delay_exponent); + } + + if (params->disable_active_migration) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION); + p = ngtcp2_put_uvarint(p, 0); + } + + if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY, + params->max_ack_delay / NGTCP2_MILLISECONDS); + } + + if (params->max_idle_timeout) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT, + params->max_idle_timeout / NGTCP2_MILLISECONDS); + } + + if (params->active_connection_id_limit && + params->active_connection_id_limit != + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT, + params->active_connection_id_limit); + } + + if (params->max_datagram_frame_size) { + p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE, + params->max_datagram_frame_size); + } + + if (params->grease_quic_bit) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT); + p = ngtcp2_put_uvarint(p, 0); + } + + if (params->version_info_present) { + p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION); + p = ngtcp2_put_uvarint(p, version_infolen); + p = ngtcp2_put_uint32be(p, params->version_info.chosen_version); + if (params->version_info.available_versionslen) { + p = ngtcp2_cpymem(p, params->version_info.available_versions, + params->version_info.available_versionslen); + } + } + + assert((size_t)(p - dest) == len); + + return (ngtcp2_ssize)len; +} + +/* + * decode_varint decodes a single varint from the buffer pointed by + * |*pp| of length |end - *pp|. If it decodes an integer + * successfully, it stores the integer in |*pdest|, increment |*pp| by + * the number of bytes read from |*pp|, and returns 0. Otherwise it + * returns -1. + */ +static int decode_varint(uint64_t *pdest, const uint8_t **pp, + const uint8_t *end) { + const uint8_t *p = *pp; + size_t len; + + if (p == end) { + return -1; + } + + len = ngtcp2_get_uvarintlen(p); + if ((uint64_t)(end - p) < len) { + return -1; + } + + *pp = ngtcp2_get_uvarint(pdest, p); + + return 0; +} + +/* + * decode_varint_param decodes length prefixed value from the buffer + * pointed by |*pp| of length |end - *pp|. The length and value are + * encoded in varint form. If it decodes a value successfully, it + * stores the value in |*pdest|, increment |*pp| by the number of + * bytes read from |*pp|, and returns 0. Otherwise it returns -1. + */ +static int decode_varint_param(uint64_t *pdest, const uint8_t **pp, + const uint8_t *end) { + const uint8_t *p = *pp; + uint64_t valuelen; + + if (decode_varint(&valuelen, &p, end) != 0) { + return -1; + } + + if (p == end) { + return -1; + } + + if ((uint64_t)(end - p) < valuelen) { + return -1; + } + + if (ngtcp2_get_uvarintlen(p) != valuelen) { + return -1; + } + + *pp = ngtcp2_get_uvarint(pdest, p); + + return 0; +} + +/* + * decode_cid_param decodes length prefixed ngtcp2_cid from the buffer + * pointed by |*pp| of length |end - *pp|. The length is encoded in + * varint form. If it decodes a value successfully, it stores the + * value in |*pdest|, increment |*pp| by the number of read from + * |*pp|, and returns the number of bytes read. Otherwise it returns + * the one of the negative error code: + * + * NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM + * Could not decode Connection ID. + */ +static int decode_cid_param(ngtcp2_cid *pdest, const uint8_t **pp, + const uint8_t *end) { + const uint8_t *p = *pp; + uint64_t valuelen; + + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + if ((valuelen != 0 && valuelen < NGTCP2_MIN_CIDLEN) || + valuelen > NGTCP2_MAX_CIDLEN || (size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + ngtcp2_cid_init(pdest, p, (size_t)valuelen); + + p += valuelen; + + *pp = p; + + return 0; +} + +int ngtcp2_decode_transport_params_versioned( + int transport_params_version, ngtcp2_transport_params *params, + ngtcp2_transport_params_type exttype, const uint8_t *data, size_t datalen) { + const uint8_t *p, *end, *lend; + size_t len; + uint64_t param_type; + uint64_t valuelen; + int rv; + int initial_scid_present = 0; + int original_dcid_present = 0; + ngtcp2_sockaddr_in *sa_in; + ngtcp2_sockaddr_in6 *sa_in6; + uint32_t version; + + (void)transport_params_version; + + if (datalen == 0) { + return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM; + } + + /* Set default values */ + memset(params, 0, sizeof(*params)); + params->initial_max_streams_bidi = 0; + params->initial_max_streams_uni = 0; + params->initial_max_stream_data_bidi_local = 0; + params->initial_max_stream_data_bidi_remote = 0; + params->initial_max_stream_data_uni = 0; + params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE; + params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT; + params->stateless_reset_token_present = 0; + params->preferred_address_present = 0; + params->disable_active_migration = 0; + params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY; + params->max_idle_timeout = 0; + params->active_connection_id_limit = + NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT; + params->retry_scid_present = 0; + params->max_datagram_frame_size = 0; + memset(¶ms->retry_scid, 0, sizeof(params->retry_scid)); + memset(¶ms->initial_scid, 0, sizeof(params->initial_scid)); + memset(¶ms->original_dcid, 0, sizeof(params->original_dcid)); + params->version_info_present = 0; + + p = data; + end = data + datalen; + + for (; (size_t)(end - p) >= 2;) { + if (decode_varint(¶m_type, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + switch (param_type) { + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL: + if (decode_varint_param(¶ms->initial_max_stream_data_bidi_local, &p, + end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE: + if (decode_varint_param(¶ms->initial_max_stream_data_bidi_remote, &p, + end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI: + if (decode_varint_param(¶ms->initial_max_stream_data_uni, &p, end) != + 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA: + if (decode_varint_param(¶ms->initial_max_data, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI: + if (decode_varint_param(¶ms->initial_max_streams_bidi, &p, end) != + 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->initial_max_streams_bidi > NGTCP2_MAX_STREAMS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI: + if (decode_varint_param(¶ms->initial_max_streams_uni, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->initial_max_streams_uni > NGTCP2_MAX_STREAMS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT: + if (decode_varint_param(¶ms->max_idle_timeout, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->max_idle_timeout *= NGTCP2_MILLISECONDS; + break; + case NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE: + if (decode_varint_param(¶ms->max_udp_payload_size, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN: + if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)valuelen != sizeof(params->stateless_reset_token)) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < sizeof(params->stateless_reset_token)) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + p = ngtcp2_get_bytes(params->stateless_reset_token, p, + sizeof(params->stateless_reset_token)); + params->stateless_reset_token_present = 1; + + break; + case NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT: + if (decode_varint_param(¶ms->ack_delay_exponent, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->ack_delay_exponent > 20) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS: + if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + len = 4 /* ipv4Address */ + 2 /* ipv4Port */ + 16 /* ipv6Address */ + + 2 /* ipv6Port */ + + 1 /* cid length */ + NGTCP2_STATELESS_RESET_TOKENLEN; + if (valuelen < len) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + sa_in = ¶ms->preferred_address.ipv4; + + p = ngtcp2_get_bytes(&sa_in->sin_addr, p, sizeof(sa_in->sin_addr)); + p = ngtcp2_get_uint16be(&sa_in->sin_port, p); + + if (sa_in->sin_port || memcmp(empty_address, &sa_in->sin_addr, + sizeof(sa_in->sin_addr)) != 0) { + sa_in->sin_family = AF_INET; + params->preferred_address.ipv4_present = 1; + } + + sa_in6 = ¶ms->preferred_address.ipv6; + + p = ngtcp2_get_bytes(&sa_in6->sin6_addr, p, sizeof(sa_in6->sin6_addr)); + p = ngtcp2_get_uint16be(&sa_in6->sin6_port, p); + + if (sa_in6->sin6_port || memcmp(empty_address, &sa_in6->sin6_addr, + sizeof(sa_in6->sin6_addr)) != 0) { + sa_in6->sin6_family = AF_INET6; + params->preferred_address.ipv6_present = 1; + } + + /* cid */ + params->preferred_address.cid.datalen = *p++; + len += params->preferred_address.cid.datalen; + if (valuelen != len || + params->preferred_address.cid.datalen > NGTCP2_MAX_CIDLEN || + params->preferred_address.cid.datalen < NGTCP2_MIN_CIDLEN) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->preferred_address.cid.datalen) { + p = ngtcp2_get_bytes(params->preferred_address.cid.data, p, + params->preferred_address.cid.datalen); + } + + /* stateless reset token */ + p = ngtcp2_get_bytes( + params->preferred_address.stateless_reset_token, p, + sizeof(params->preferred_address.stateless_reset_token)); + params->preferred_address_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION: + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (valuelen != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->disable_active_migration = 1; + break; + case NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID: + if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + rv = decode_cid_param(¶ms->original_dcid, &p, end); + if (rv != 0) { + return rv; + } + original_dcid_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID: + if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + rv = decode_cid_param(¶ms->retry_scid, &p, end); + if (rv != 0) { + return rv; + } + params->retry_scid_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID: + rv = decode_cid_param(¶ms->initial_scid, &p, end); + if (rv != 0) { + return rv; + } + initial_scid_present = 1; + break; + case NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY: + if (decode_varint_param(¶ms->max_ack_delay, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (params->max_ack_delay >= 16384) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->max_ack_delay *= NGTCP2_MILLISECONDS; + break; + case NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT: + if (decode_varint_param(¶ms->active_connection_id_limit, &p, end) != + 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE: + if (decode_varint_param(¶ms->max_datagram_frame_size, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + break; + case NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT: + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (valuelen != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + params->grease_quic_bit = 1; + break; + case NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION: + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (valuelen < sizeof(uint32_t) || (valuelen & 0x3)) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + p = ngtcp2_get_uint32(¶ms->version_info.chosen_version, p); + if (params->version_info.chosen_version == 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if (valuelen > sizeof(uint32_t)) { + params->version_info.available_versions = (uint8_t *)p; + params->version_info.available_versionslen = + (size_t)valuelen - sizeof(uint32_t); + + for (lend = p + (valuelen - sizeof(uint32_t)); p != lend;) { + p = ngtcp2_get_uint32(&version, p); + if (version == 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + } + } + params->version_info_present = 1; + break; + default: + /* Ignore unknown parameter */ + if (decode_varint(&valuelen, &p, end) != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + if ((size_t)(end - p) < valuelen) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + p += valuelen; + break; + } + } + + if (end - p != 0) { + return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM; + } + + if (!initial_scid_present || + (exttype == NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS && + !original_dcid_present)) { + return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM; + } + + return 0; +} + +static int transport_params_copy_new(ngtcp2_transport_params **pdest, + const ngtcp2_transport_params *src, + const ngtcp2_mem *mem) { + size_t len = sizeof(**pdest); + ngtcp2_transport_params *dest; + uint8_t *p; + + if (src->version_info_present) { + len += src->version_info.available_versionslen; + } + + dest = ngtcp2_mem_malloc(mem, len); + if (dest == NULL) { + return NGTCP2_ERR_NOMEM; + } + + *dest = *src; + + if (src->version_info_present && src->version_info.available_versionslen) { + p = (uint8_t *)dest + sizeof(*dest); + memcpy(p, src->version_info.available_versions, + src->version_info.available_versionslen); + dest->version_info.available_versions = p; + } + + *pdest = dest; + + return 0; +} + +int ngtcp2_decode_transport_params_new(ngtcp2_transport_params **pparams, + ngtcp2_transport_params_type exttype, + const uint8_t *data, size_t datalen, + const ngtcp2_mem *mem) { + int rv; + ngtcp2_transport_params params; + + rv = ngtcp2_decode_transport_params(¶ms, exttype, data, datalen); + if (rv < 0) { + return rv; + } + + if (mem == NULL) { + mem = ngtcp2_mem_default(); + } + + return transport_params_copy_new(pparams, ¶ms, mem); +} + +void ngtcp2_transport_params_del(ngtcp2_transport_params *params, + const ngtcp2_mem *mem) { + if (params == NULL) { + return; + } + + if (mem == NULL) { + mem = ngtcp2_mem_default(); + } + + ngtcp2_mem_free(mem, params); +} + +int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest, + const ngtcp2_transport_params *src, + const ngtcp2_mem *mem) { + if (src == NULL) { + *pdest = NULL; + return 0; + } + + return transport_params_copy_new(pdest, src, mem); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h new file mode 100644 index 0000000..3b91ce9 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h @@ -0,0 +1,148 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_CRYPTO_H +#define NGTCP2_CRYPTO_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" + +/* NGTCP2_INITIAL_AEAD_OVERHEAD is an overhead of AEAD used by Initial + packets. Because QUIC uses AEAD_AES_128_GCM, the overhead is 16 + bytes. */ +#define NGTCP2_INITIAL_AEAD_OVERHEAD 16 + +/* NGTCP2_MAX_AEAD_OVERHEAD is expected maximum AEAD overhead. */ +#define NGTCP2_MAX_AEAD_OVERHEAD 16 + +/* ngtcp2_transport_param_id is the registry of QUIC transport + parameter ID. */ +typedef enum ngtcp2_transport_param_id { + NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID = 0x0000, + NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT = 0x0001, + NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN = 0x0002, + NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE = 0x0003, + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA = 0x0004, + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL = 0x0005, + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE = 0x0006, + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI = 0x0007, + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI = 0x0008, + NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI = 0x0009, + NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT = 0x000a, + NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY = 0x000b, + NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION = 0x000c, + NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS = 0x000d, + NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT = 0x000e, + NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID = 0x000f, + NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID = 0x0010, + /* https://datatracker.ietf.org/doc/html/rfc9221 */ + NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE = 0x0020, + NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT = 0x2ab2, + /* https://datatracker.ietf.org/doc/html/draft-ietf-quic-version-negotiation-14 + */ + NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION = 0x11, +} ngtcp2_transport_param_id; + +/* NGTCP2_CRYPTO_KM_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_CRYPTO_KM_FLAG_NONE 0x00u +/* NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE is set if key phase bit is + set. */ +#define NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE 0x01u + +typedef struct ngtcp2_crypto_km { + ngtcp2_vec secret; + ngtcp2_crypto_aead_ctx aead_ctx; + ngtcp2_vec iv; + /* pkt_num is a packet number of a packet which uses this keying + material. For encryption key, it is the lowest packet number of + a packet. For decryption key, it is the lowest packet number of + a packet which can be decrypted with this keying material. */ + int64_t pkt_num; + /* use_count is the number of encryption applied with this key. + This field is only used for tx key. */ + uint64_t use_count; + /* flags is the bitwise OR of zero or more of + NGTCP2_CRYPTO_KM_FLAG_*. */ + uint8_t flags; +} ngtcp2_crypto_km; + +/* + * ngtcp2_crypto_km_new creates new ngtcp2_crypto_km object and + * assigns its pointer to |*pckm|. The |secret| of length + * |secretlen|, the |key| of length |keylen| and the |iv| of length + * |ivlen| are copied to |*pckm|. If |secretlen| == 0, the function + * assumes no secret is given which is acceptable. The sole reason to + * store secret is update keys. Only 1RTT key can be updated. + */ +int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret, + size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, + const ngtcp2_mem *mem); + +/* + * ngtcp2_crypto_km_nocopy_new is similar to ngtcp2_crypto_km_new, but + * it does not copy secret, key and IV. + */ +int ngtcp2_crypto_km_nocopy_new(ngtcp2_crypto_km **pckm, size_t secretlen, + size_t ivlen, const ngtcp2_mem *mem); + +void ngtcp2_crypto_km_del(ngtcp2_crypto_km *ckm, const ngtcp2_mem *mem); + +typedef struct ngtcp2_crypto_cc { + ngtcp2_crypto_aead aead; + ngtcp2_crypto_cipher hp; + ngtcp2_crypto_km *ckm; + ngtcp2_crypto_cipher_ctx hp_ctx; + ngtcp2_encrypt encrypt; + ngtcp2_decrypt decrypt; + ngtcp2_hp_mask hp_mask; +} ngtcp2_crypto_cc; + +void ngtcp2_crypto_create_nonce(uint8_t *dest, const uint8_t *iv, size_t ivlen, + int64_t pkt_num); + +/* + * ngtcp2_transport_params_copy_new makes a copy of |src|, and assigns + * it to |*pdest|. If |src| is NULL, NULL is assigned to |*pdest|. + * + * Caller is responsible to call ngtcp2_transport_params_del to free + * the memory assigned to |*pdest|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest, + const ngtcp2_transport_params *src, + const ngtcp2_mem *mem); + +#endif /* NGTCP2_CRYPTO_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c new file mode 100644 index 0000000..ab932bd --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c @@ -0,0 +1,154 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_err.h" + +const char *ngtcp2_strerror(int liberr) { + switch (liberr) { + case 0: + return "NO_ERROR"; + case NGTCP2_ERR_INVALID_ARGUMENT: + return "ERR_INVALID_ARGUMENT"; + case NGTCP2_ERR_NOBUF: + return "ERR_NOBUF"; + case NGTCP2_ERR_PROTO: + return "ERR_PROTO"; + case NGTCP2_ERR_INVALID_STATE: + return "ERR_INVALID_STATE"; + case NGTCP2_ERR_ACK_FRAME: + return "ERR_ACK_FRAME"; + case NGTCP2_ERR_STREAM_ID_BLOCKED: + return "ERR_STREAM_ID_BLOCKED"; + case NGTCP2_ERR_STREAM_IN_USE: + return "ERR_STREAM_IN_USE"; + case NGTCP2_ERR_STREAM_DATA_BLOCKED: + return "ERR_STREAM_DATA_BLOCKED"; + case NGTCP2_ERR_FLOW_CONTROL: + return "ERR_FLOW_CONTROL"; + case NGTCP2_ERR_CONNECTION_ID_LIMIT: + return "ERR_CONNECTION_ID_LIMIT"; + case NGTCP2_ERR_STREAM_LIMIT: + return "ERR_STREAM_LIMIT"; + case NGTCP2_ERR_FINAL_SIZE: + return "ERR_FINAL_SIZE"; + case NGTCP2_ERR_CRYPTO: + return "ERR_CRYPTO"; + case NGTCP2_ERR_PKT_NUM_EXHAUSTED: + return "ERR_PKT_NUM_EXHAUSTED"; + case NGTCP2_ERR_NOMEM: + return "ERR_NOMEM"; + case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM: + return "ERR_REQUIRED_TRANSPORT_PARAM"; + case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM: + return "ERR_MALFORMED_TRANSPORT_PARAM"; + case NGTCP2_ERR_FRAME_ENCODING: + return "ERR_FRAME_ENCODING"; + case NGTCP2_ERR_DECRYPT: + return "ERR_DECRYPT"; + case NGTCP2_ERR_STREAM_SHUT_WR: + return "ERR_STREAM_SHUT_WR"; + case NGTCP2_ERR_STREAM_NOT_FOUND: + return "ERR_STREAM_NOT_FOUND"; + case NGTCP2_ERR_STREAM_STATE: + return "ERR_STREAM_STATE"; + case NGTCP2_ERR_RECV_VERSION_NEGOTIATION: + return "ERR_RECV_VERSION_NEGOTIATION"; + case NGTCP2_ERR_CLOSING: + return "ERR_CLOSING"; + case NGTCP2_ERR_DRAINING: + return "ERR_DRAINING"; + case NGTCP2_ERR_TRANSPORT_PARAM: + return "ERR_TRANSPORT_PARAM"; + case NGTCP2_ERR_DISCARD_PKT: + return "ERR_DISCARD_PKT"; + case NGTCP2_ERR_CONN_ID_BLOCKED: + return "ERR_CONN_ID_BLOCKED"; + case NGTCP2_ERR_CALLBACK_FAILURE: + return "ERR_CALLBACK_FAILURE"; + case NGTCP2_ERR_INTERNAL: + return "ERR_INTERNAL"; + case NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED: + return "ERR_CRYPTO_BUFFER_EXCEEDED"; + case NGTCP2_ERR_WRITE_MORE: + return "ERR_WRITE_MORE"; + case NGTCP2_ERR_RETRY: + return "ERR_RETRY"; + case NGTCP2_ERR_DROP_CONN: + return "ERR_DROP_CONN"; + case NGTCP2_ERR_AEAD_LIMIT_REACHED: + return "ERR_AEAD_LIMIT_REACHED"; + case NGTCP2_ERR_NO_VIABLE_PATH: + return "ERR_NO_VIABLE_PATH"; + case NGTCP2_ERR_VERSION_NEGOTIATION: + return "ERR_VERSION_NEGOTIATION"; + case NGTCP2_ERR_HANDSHAKE_TIMEOUT: + return "ERR_HANDSHAKE_TIMEOUT"; + case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE: + return "ERR_VERSION_NEGOTIATION_FAILURE"; + case NGTCP2_ERR_IDLE_CLOSE: + return "ERR_IDLE_CLOSE"; + default: + return "(unknown)"; + } +} + +int ngtcp2_err_is_fatal(int liberr) { return liberr < NGTCP2_ERR_FATAL; } + +uint64_t ngtcp2_err_infer_quic_transport_error_code(int liberr) { + switch (liberr) { + case 0: + return NGTCP2_NO_ERROR; + case NGTCP2_ERR_ACK_FRAME: + case NGTCP2_ERR_FRAME_ENCODING: + return NGTCP2_FRAME_ENCODING_ERROR; + case NGTCP2_ERR_FLOW_CONTROL: + return NGTCP2_FLOW_CONTROL_ERROR; + case NGTCP2_ERR_CONNECTION_ID_LIMIT: + return NGTCP2_CONNECTION_ID_LIMIT_ERROR; + case NGTCP2_ERR_STREAM_LIMIT: + return NGTCP2_STREAM_LIMIT_ERROR; + case NGTCP2_ERR_FINAL_SIZE: + return NGTCP2_FINAL_SIZE_ERROR; + case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM: + case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM: + case NGTCP2_ERR_TRANSPORT_PARAM: + return NGTCP2_TRANSPORT_PARAMETER_ERROR; + case NGTCP2_ERR_INVALID_ARGUMENT: + case NGTCP2_ERR_NOMEM: + case NGTCP2_ERR_CALLBACK_FAILURE: + return NGTCP2_INTERNAL_ERROR; + case NGTCP2_ERR_STREAM_STATE: + return NGTCP2_STREAM_STATE_ERROR; + case NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED: + return NGTCP2_CRYPTO_BUFFER_EXCEEDED; + case NGTCP2_ERR_AEAD_LIMIT_REACHED: + return NGTCP2_AEAD_LIMIT_REACHED; + case NGTCP2_ERR_NO_VIABLE_PATH: + return NGTCP2_NO_VIABLE_PATH; + case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE: + return NGTCP2_VERSION_NEGOTIATION_ERROR; + default: + return NGTCP2_PROTOCOL_VIOLATION; + } +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h new file mode 100644 index 0000000..9229f54 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h @@ -0,0 +1,34 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_ERR_H +#define NGTCP2_ERR_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#endif /* NGTCP2_ERR_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c new file mode 100644 index 0000000..87c2389 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c @@ -0,0 +1,167 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_gaptr.h" + +#include <string.h> +#include <assert.h> + +void ngtcp2_gaptr_init(ngtcp2_gaptr *gaptr, const ngtcp2_mem *mem) { + ngtcp2_ksl_init(&gaptr->gap, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range), + mem); + + gaptr->mem = mem; +} + +static int gaptr_gap_init(ngtcp2_gaptr *gaptr) { + ngtcp2_range range = {0, UINT64_MAX}; + int rv; + + rv = ngtcp2_ksl_insert(&gaptr->gap, NULL, &range, NULL); + if (rv != 0) { + return rv; + } + + return 0; +} + +void ngtcp2_gaptr_free(ngtcp2_gaptr *gaptr) { + if (gaptr == NULL) { + return; + } + + ngtcp2_ksl_free(&gaptr->gap); +} + +int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen) { + int rv; + ngtcp2_range k, m, l, r, q = {offset, offset + datalen}; + ngtcp2_ksl_it it; + + if (ngtcp2_ksl_len(&gaptr->gap) == 0) { + rv = gaptr_gap_init(gaptr); + if (rv != 0) { + return rv; + } + } + + it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q, + ngtcp2_ksl_range_exclusive_compar); + + for (; !ngtcp2_ksl_it_end(&it);) { + k = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); + m = ngtcp2_range_intersect(&q, &k); + if (!ngtcp2_range_len(&m)) { + break; + } + + if (ngtcp2_range_eq(&k, &m)) { + ngtcp2_ksl_remove_hint(&gaptr->gap, &it, &it, &k); + continue; + } + ngtcp2_range_cut(&l, &r, &k, &m); + if (ngtcp2_range_len(&l)) { + ngtcp2_ksl_update_key(&gaptr->gap, &k, &l); + + if (ngtcp2_range_len(&r)) { + rv = ngtcp2_ksl_insert(&gaptr->gap, &it, &r, NULL); + if (rv != 0) { + return rv; + } + } + } else if (ngtcp2_range_len(&r)) { + ngtcp2_ksl_update_key(&gaptr->gap, &k, &r); + } + ngtcp2_ksl_it_next(&it); + } + return 0; +} + +uint64_t ngtcp2_gaptr_first_gap_offset(ngtcp2_gaptr *gaptr) { + ngtcp2_ksl_it it; + ngtcp2_range r; + + if (ngtcp2_ksl_len(&gaptr->gap) == 0) { + return 0; + } + + it = ngtcp2_ksl_begin(&gaptr->gap); + r = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); + + return r.begin; +} + +ngtcp2_range ngtcp2_gaptr_get_first_gap_after(ngtcp2_gaptr *gaptr, + uint64_t offset) { + ngtcp2_range q = {offset, offset + 1}; + ngtcp2_ksl_it it; + + if (ngtcp2_ksl_len(&gaptr->gap) == 0) { + ngtcp2_range r = {0, UINT64_MAX}; + return r; + } + + it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q, + ngtcp2_ksl_range_exclusive_compar); + + assert(!ngtcp2_ksl_it_end(&it)); + + return *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); +} + +int ngtcp2_gaptr_is_pushed(ngtcp2_gaptr *gaptr, uint64_t offset, + uint64_t datalen) { + ngtcp2_range q = {offset, offset + datalen}; + ngtcp2_ksl_it it; + ngtcp2_range k; + ngtcp2_range m; + + if (ngtcp2_ksl_len(&gaptr->gap) == 0) { + return 0; + } + + it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q, + ngtcp2_ksl_range_exclusive_compar); + k = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); + m = ngtcp2_range_intersect(&q, &k); + + return ngtcp2_range_len(&m) == 0; +} + +void ngtcp2_gaptr_drop_first_gap(ngtcp2_gaptr *gaptr) { + ngtcp2_ksl_it it; + ngtcp2_range r; + + if (ngtcp2_ksl_len(&gaptr->gap) == 0) { + return; + } + + it = ngtcp2_ksl_begin(&gaptr->gap); + + assert(!ngtcp2_ksl_it_end(&it)); + + r = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it); + + ngtcp2_ksl_remove_hint(&gaptr->gap, NULL, &it, &r); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h new file mode 100644 index 0000000..0f100a8 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h @@ -0,0 +1,98 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_GAPTR_H +#define NGTCP2_GAPTR_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" +#include "ngtcp2_ksl.h" +#include "ngtcp2_range.h" + +/* + * ngtcp2_gaptr maintains the gap in the range [0, UINT64_MAX). + */ +typedef struct ngtcp2_gaptr { + /* gap maintains the range of offset which is not received + yet. Initially, its range is [0, UINT64_MAX). */ + ngtcp2_ksl gap; + /* mem is custom memory allocator */ + const ngtcp2_mem *mem; +} ngtcp2_gaptr; + +/* + * ngtcp2_gaptr_init initializes |gaptr|. + */ +void ngtcp2_gaptr_init(ngtcp2_gaptr *gaptr, const ngtcp2_mem *mem); + +/* + * ngtcp2_gaptr_free frees resources allocated for |gaptr|. + */ +void ngtcp2_gaptr_free(ngtcp2_gaptr *gaptr); + +/* + * ngtcp2_gaptr_push adds new data of length |datalen| at the stream + * offset |offset|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen); + +/* + * ngtcp2_gaptr_first_gap_offset returns the offset to the first gap. + * If there is no gap, it returns UINT64_MAX. + */ +uint64_t ngtcp2_gaptr_first_gap_offset(ngtcp2_gaptr *gaptr); + +/* + * ngtcp2_gaptr_get_first_gap_after returns the first gap which + * overlaps or comes after |offset|. + */ +ngtcp2_range ngtcp2_gaptr_get_first_gap_after(ngtcp2_gaptr *gaptr, + uint64_t offset); + +/* + * ngtcp2_gaptr_is_pushed returns nonzero if range [offset, offset + + * datalen) is completely pushed into this object. + */ +int ngtcp2_gaptr_is_pushed(ngtcp2_gaptr *gaptr, uint64_t offset, + uint64_t datalen); + +/* + * ngtcp2_gaptr_drop_first_gap deletes the first gap entirely as if + * the range is pushed. This function assumes that at least one gap + * exists. + */ +void ngtcp2_gaptr_drop_first_gap(ngtcp2_gaptr *gaptr); + +#endif /* NGTCP2_GAPTR_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c new file mode 100644 index 0000000..d988022 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c @@ -0,0 +1,79 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_idtr.h" + +#include <assert.h> + +void ngtcp2_idtr_init(ngtcp2_idtr *idtr, int server, const ngtcp2_mem *mem) { + ngtcp2_gaptr_init(&idtr->gap, mem); + + idtr->server = server; +} + +void ngtcp2_idtr_free(ngtcp2_idtr *idtr) { + if (idtr == NULL) { + return; + } + + ngtcp2_gaptr_free(&idtr->gap); +} + +/* + * id_from_stream_id translates |stream_id| to id space used by + * ngtcp2_idtr. + */ +static uint64_t id_from_stream_id(int64_t stream_id) { + return (uint64_t)(stream_id >> 2); +} + +int ngtcp2_idtr_open(ngtcp2_idtr *idtr, int64_t stream_id) { + uint64_t q; + + assert((idtr->server && (stream_id % 2)) || + (!idtr->server && (stream_id % 2)) == 0); + + q = id_from_stream_id(stream_id); + + if (ngtcp2_gaptr_is_pushed(&idtr->gap, q, 1)) { + return NGTCP2_ERR_STREAM_IN_USE; + } + + return ngtcp2_gaptr_push(&idtr->gap, q, 1); +} + +int ngtcp2_idtr_is_open(ngtcp2_idtr *idtr, int64_t stream_id) { + uint64_t q; + + assert((idtr->server && (stream_id % 2)) || + (!idtr->server && (stream_id % 2)) == 0); + + q = id_from_stream_id(stream_id); + + return ngtcp2_gaptr_is_pushed(&idtr->gap, q, 1); +} + +uint64_t ngtcp2_idtr_first_gap(ngtcp2_idtr *idtr) { + return ngtcp2_gaptr_first_gap_offset(&idtr->gap); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h new file mode 100644 index 0000000..edb8c68 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h @@ -0,0 +1,89 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_IDTR_H +#define NGTCP2_IDTR_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" +#include "ngtcp2_gaptr.h" + +/* + * ngtcp2_idtr tracks the usage of stream ID. + */ +typedef struct ngtcp2_idtr { + /* gap maintains the range of ID which is not used yet. Initially, + its range is [0, UINT64_MAX). */ + ngtcp2_gaptr gap; + /* server is nonzero if this object records server initiated stream + ID. */ + int server; +} ngtcp2_idtr; + +/* + * ngtcp2_idtr_init initializes |idtr|. + * + * If this object records server initiated ID (even number), set + * |server| to nonzero. + */ +void ngtcp2_idtr_init(ngtcp2_idtr *idtr, int server, const ngtcp2_mem *mem); + +/* + * ngtcp2_idtr_free frees resources allocated for |idtr|. + */ +void ngtcp2_idtr_free(ngtcp2_idtr *idtr); + +/* + * ngtcp2_idtr_open claims that |stream_id| is in used. + * + * It returns 0 if it succeeds, or one of the following negative error + * codes: + * + * NGTCP2_ERR_STREAM_IN_USE + * ID has already been used. + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_idtr_open(ngtcp2_idtr *idtr, int64_t stream_id); + +/* + * ngtcp2_idtr_open tells whether ID |stream_id| is in used or not. + * + * It returns nonzero if |stream_id| is used. + */ +int ngtcp2_idtr_is_open(ngtcp2_idtr *idtr, int64_t stream_id); + +/* + * ngtcp2_idtr_first_gap returns the first id of first gap. If there + * is no gap, it returns UINT64_MAX. The returned id is an id space + * used in this object internally, and not stream ID. + */ +uint64_t ngtcp2_idtr_first_gap(ngtcp2_idtr *idtr); + +#endif /* NGTCP2_IDTR_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c new file mode 100644 index 0000000..0bd424c --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c @@ -0,0 +1,819 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_ksl.h" + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdio.h> + +#include "ngtcp2_macro.h" +#include "ngtcp2_mem.h" +#include "ngtcp2_range.h" + +static ngtcp2_ksl_blk null_blk = {{{NULL, NULL, 0, 0, {0}}}}; + +static size_t ksl_nodelen(size_t keylen) { + return (sizeof(ngtcp2_ksl_node) + keylen - sizeof(uint64_t) + 0xfu) & + ~(uintptr_t)0xfu; +} + +static size_t ksl_blklen(size_t nodelen) { + return sizeof(ngtcp2_ksl_blk) + nodelen * NGTCP2_KSL_MAX_NBLK - + sizeof(uint64_t); +} + +/* + * ksl_node_set_key sets |key| to |node|. + */ +static void ksl_node_set_key(ngtcp2_ksl *ksl, ngtcp2_ksl_node *node, + const void *key) { + memcpy(node->key, key, ksl->keylen); +} + +void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, size_t keylen, + const ngtcp2_mem *mem) { + size_t nodelen = ksl_nodelen(keylen); + + ngtcp2_objalloc_init(&ksl->blkalloc, + ((ksl_blklen(nodelen) + 0xfu) & ~(uintptr_t)0xfu) * 8, + mem); + + ksl->head = NULL; + ksl->front = ksl->back = NULL; + ksl->compar = compar; + ksl->keylen = keylen; + ksl->nodelen = nodelen; + ksl->n = 0; +} + +static ngtcp2_ksl_blk *ksl_blk_objalloc_new(ngtcp2_ksl *ksl) { + return ngtcp2_objalloc_ksl_blk_len_get(&ksl->blkalloc, + ksl_blklen(ksl->nodelen)); +} + +static void ksl_blk_objalloc_del(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) { + ngtcp2_objalloc_ksl_blk_release(&ksl->blkalloc, blk); +} + +static int ksl_head_init(ngtcp2_ksl *ksl) { + ngtcp2_ksl_blk *head = ksl_blk_objalloc_new(ksl); + if (!head) { + return NGTCP2_ERR_NOMEM; + } + + head->next = head->prev = NULL; + head->n = 0; + head->leaf = 1; + + ksl->head = head; + ksl->front = ksl->back = head; + + return 0; +} + +#ifdef NOMEMPOOL +/* + * ksl_free_blk frees |blk| recursively. + */ +static void ksl_free_blk(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) { + size_t i; + + if (!blk->leaf) { + for (i = 0; i < blk->n; ++i) { + ksl_free_blk(ksl, ngtcp2_ksl_nth_node(ksl, blk, i)->blk); + } + } + + ksl_blk_objalloc_del(ksl, blk); +} +#endif /* NOMEMPOOL */ + +void ngtcp2_ksl_free(ngtcp2_ksl *ksl) { + if (!ksl || !ksl->head) { + return; + } + +#ifdef NOMEMPOOL + ksl_free_blk(ksl, ksl->head); +#endif /* NOMEMPOOL */ + + ngtcp2_objalloc_free(&ksl->blkalloc); +} + +/* + * ksl_split_blk splits |blk| into 2 ngtcp2_ksl_blk objects. The new + * ngtcp2_ksl_blk is always the "right" block. + * + * It returns the pointer to the ngtcp2_ksl_blk created which is the + * located at the right of |blk|, or NULL which indicates out of + * memory error. + */ +static ngtcp2_ksl_blk *ksl_split_blk(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) { + ngtcp2_ksl_blk *rblk; + + rblk = ksl_blk_objalloc_new(ksl); + if (rblk == NULL) { + return NULL; + } + + rblk->next = blk->next; + blk->next = rblk; + if (rblk->next) { + rblk->next->prev = rblk; + } else if (ksl->back == blk) { + ksl->back = rblk; + } + rblk->prev = blk; + rblk->leaf = blk->leaf; + + rblk->n = blk->n / 2; + + memcpy(rblk->nodes, blk->nodes + ksl->nodelen * (blk->n - rblk->n), + ksl->nodelen * rblk->n); + + blk->n -= rblk->n; + + assert(blk->n >= NGTCP2_KSL_MIN_NBLK); + assert(rblk->n >= NGTCP2_KSL_MIN_NBLK); + + return rblk; +} + +/* + * ksl_split_node splits a node included in |blk| at the position |i| + * into 2 adjacent nodes. The new node is always inserted at the + * position |i+1|. + * + * It returns 0 if it succeeds, or one of the following negative error + * codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int ksl_split_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { + ngtcp2_ksl_node *node; + ngtcp2_ksl_blk *lblk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk, *rblk; + + rblk = ksl_split_blk(ksl, lblk); + if (rblk == NULL) { + return NGTCP2_ERR_NOMEM; + } + + memmove(blk->nodes + (i + 2) * ksl->nodelen, + blk->nodes + (i + 1) * ksl->nodelen, + ksl->nodelen * (blk->n - (i + 1))); + + node = ngtcp2_ksl_nth_node(ksl, blk, i + 1); + node->blk = rblk; + ++blk->n; + ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, rblk, rblk->n - 1)->key); + + node = ngtcp2_ksl_nth_node(ksl, blk, i); + ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); + + return 0; +} + +/* + * ksl_split_head splits a head (root) block. It increases the height + * of skip list by 1. + * + * It returns 0 if it succeeds, or one of the following negative error + * codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +static int ksl_split_head(ngtcp2_ksl *ksl) { + ngtcp2_ksl_blk *rblk = NULL, *lblk, *nhead = NULL; + ngtcp2_ksl_node *node; + + rblk = ksl_split_blk(ksl, ksl->head); + if (rblk == NULL) { + return NGTCP2_ERR_NOMEM; + } + + lblk = ksl->head; + + nhead = ksl_blk_objalloc_new(ksl); + if (nhead == NULL) { + ksl_blk_objalloc_del(ksl, rblk); + return NGTCP2_ERR_NOMEM; + } + nhead->next = nhead->prev = NULL; + nhead->n = 2; + nhead->leaf = 0; + + node = ngtcp2_ksl_nth_node(ksl, nhead, 0); + ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); + node->blk = lblk; + + node = ngtcp2_ksl_nth_node(ksl, nhead, 1); + ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, rblk, rblk->n - 1)->key); + node->blk = rblk; + + ksl->head = nhead; + + return 0; +} + +/* + * insert_node inserts a node whose key is |key| with the associated + * |data| at the index of |i|. This function assumes that the number + * of nodes contained by |blk| is strictly less than + * NGTCP2_KSL_MAX_NBLK. + */ +static void ksl_insert_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i, + const ngtcp2_ksl_key *key, void *data) { + ngtcp2_ksl_node *node; + + assert(blk->n < NGTCP2_KSL_MAX_NBLK); + + memmove(blk->nodes + (i + 1) * ksl->nodelen, blk->nodes + i * ksl->nodelen, + ksl->nodelen * (blk->n - i)); + + node = ngtcp2_ksl_nth_node(ksl, blk, i); + ksl_node_set_key(ksl, node, key); + node->data = data; + + ++blk->n; +} + +static size_t ksl_bsearch(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + const ngtcp2_ksl_key *key, ngtcp2_ksl_compar compar) { + size_t i; + ngtcp2_ksl_node *node; + + for (i = 0, node = (ngtcp2_ksl_node *)(void *)blk->nodes; + i < blk->n && compar((ngtcp2_ksl_key *)node->key, key); + ++i, node = (ngtcp2_ksl_node *)(void *)((uint8_t *)node + ksl->nodelen)) + ; + + return i; +} + +int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, + const ngtcp2_ksl_key *key, void *data) { + ngtcp2_ksl_blk *blk; + ngtcp2_ksl_node *node; + size_t i; + int rv; + + if (!ksl->head) { + rv = ksl_head_init(ksl); + if (rv != 0) { + return rv; + } + } + + blk = ksl->head; + + if (blk->n == NGTCP2_KSL_MAX_NBLK) { + rv = ksl_split_head(ksl); + if (rv != 0) { + return rv; + } + blk = ksl->head; + } + + for (;;) { + i = ksl_bsearch(ksl, blk, key, ksl->compar); + + if (blk->leaf) { + if (i < blk->n && + !ksl->compar(key, ngtcp2_ksl_nth_node(ksl, blk, i)->key)) { + if (it) { + *it = ngtcp2_ksl_end(ksl); + } + return NGTCP2_ERR_INVALID_ARGUMENT; + } + ksl_insert_node(ksl, blk, i, key, data); + ++ksl->n; + if (it) { + ngtcp2_ksl_it_init(it, ksl, blk, i); + } + return 0; + } + + if (i == blk->n) { + /* This insertion extends the largest key in this subtree. */ + for (; !blk->leaf;) { + node = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1); + if (node->blk->n == NGTCP2_KSL_MAX_NBLK) { + rv = ksl_split_node(ksl, blk, blk->n - 1); + if (rv != 0) { + return rv; + } + node = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1); + } + ksl_node_set_key(ksl, node, key); + blk = node->blk; + } + ksl_insert_node(ksl, blk, blk->n, key, data); + ++ksl->n; + if (it) { + ngtcp2_ksl_it_init(it, ksl, blk, blk->n - 1); + } + return 0; + } + + node = ngtcp2_ksl_nth_node(ksl, blk, i); + + if (node->blk->n == NGTCP2_KSL_MAX_NBLK) { + rv = ksl_split_node(ksl, blk, i); + if (rv != 0) { + return rv; + } + if (ksl->compar((ngtcp2_ksl_key *)node->key, key)) { + node = ngtcp2_ksl_nth_node(ksl, blk, i + 1); + if (ksl->compar((ngtcp2_ksl_key *)node->key, key)) { + ksl_node_set_key(ksl, node, key); + } + } + } + + blk = node->blk; + } +} + +/* + * ksl_remove_node removes the node included in |blk| at the index of + * |i|. + */ +static void ksl_remove_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { + memmove(blk->nodes + i * ksl->nodelen, blk->nodes + (i + 1) * ksl->nodelen, + ksl->nodelen * (blk->n - (i + 1))); + + --blk->n; +} + +/* + * ksl_merge_node merges 2 nodes which are the nodes at the index of + * |i| and |i + 1|. + * + * If |blk| is the direct descendant of head (root) block and the head + * block contains just 2 nodes, the merged block becomes head block, + * which decreases the height of |ksl| by 1. + * + * This function returns the pointer to the merged block. + */ +static ngtcp2_ksl_blk *ksl_merge_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, + size_t i) { + ngtcp2_ksl_blk *lblk, *rblk; + + assert(i + 1 < blk->n); + + lblk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk; + rblk = ngtcp2_ksl_nth_node(ksl, blk, i + 1)->blk; + + assert(lblk->n + rblk->n < NGTCP2_KSL_MAX_NBLK); + + memcpy(lblk->nodes + ksl->nodelen * lblk->n, rblk->nodes, + ksl->nodelen * rblk->n); + + lblk->n += rblk->n; + lblk->next = rblk->next; + if (lblk->next) { + lblk->next->prev = lblk; + } else if (ksl->back == rblk) { + ksl->back = lblk; + } + + ksl_blk_objalloc_del(ksl, rblk); + + if (ksl->head == blk && blk->n == 2) { + ksl_blk_objalloc_del(ksl, ksl->head); + ksl->head = lblk; + } else { + ksl_remove_node(ksl, blk, i + 1); + ksl_node_set_key(ksl, ngtcp2_ksl_nth_node(ksl, blk, i), + ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key); + } + + return lblk; +} + +/* + * ksl_shift_left moves the first nodes in blk->nodes[i]->blk->nodes + * to blk->nodes[i - 1]->blk->nodes in a manner that they have the + * same amount of nodes as much as possible. + */ +static void ksl_shift_left(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { + ngtcp2_ksl_node *lnode, *rnode; + size_t n; + + assert(i > 0); + + lnode = ngtcp2_ksl_nth_node(ksl, blk, i - 1); + rnode = ngtcp2_ksl_nth_node(ksl, blk, i); + + assert(lnode->blk->n < NGTCP2_KSL_MAX_NBLK); + assert(rnode->blk->n > NGTCP2_KSL_MIN_NBLK); + + n = (lnode->blk->n + rnode->blk->n + 1) / 2 - lnode->blk->n; + + assert(n > 0); + assert(lnode->blk->n <= NGTCP2_KSL_MAX_NBLK - n); + assert(rnode->blk->n >= NGTCP2_KSL_MIN_NBLK + n); + + memcpy(lnode->blk->nodes + ksl->nodelen * lnode->blk->n, rnode->blk->nodes, + ksl->nodelen * n); + + lnode->blk->n += (uint32_t)n; + rnode->blk->n -= (uint32_t)n; + + ksl_node_set_key( + ksl, lnode, ngtcp2_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key); + + memmove(rnode->blk->nodes, rnode->blk->nodes + ksl->nodelen * n, + ksl->nodelen * rnode->blk->n); +} + +/* + * ksl_shift_right moves the last nodes in blk->nodes[i]->blk->nodes + * to blk->nodes[i + 1]->blk->nodes in a manner that they have the + * same amount of nodes as much as possible.. + */ +static void ksl_shift_right(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) { + ngtcp2_ksl_node *lnode, *rnode; + size_t n; + + assert(i < blk->n - 1); + + lnode = ngtcp2_ksl_nth_node(ksl, blk, i); + rnode = ngtcp2_ksl_nth_node(ksl, blk, i + 1); + + assert(lnode->blk->n > NGTCP2_KSL_MIN_NBLK); + assert(rnode->blk->n < NGTCP2_KSL_MAX_NBLK); + + n = (lnode->blk->n + rnode->blk->n + 1) / 2 - rnode->blk->n; + + assert(n > 0); + assert(lnode->blk->n >= NGTCP2_KSL_MIN_NBLK + n); + assert(rnode->blk->n <= NGTCP2_KSL_MAX_NBLK - n); + + memmove(rnode->blk->nodes + ksl->nodelen * n, rnode->blk->nodes, + ksl->nodelen * rnode->blk->n); + + rnode->blk->n += (uint32_t)n; + lnode->blk->n -= (uint32_t)n; + + memcpy(rnode->blk->nodes, lnode->blk->nodes + ksl->nodelen * lnode->blk->n, + ksl->nodelen * n); + + ksl_node_set_key( + ksl, lnode, ngtcp2_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key); +} + +/* + * key_equal returns nonzero if |lhs| and |rhs| are equal using the + * function |compar|. + */ +static int key_equal(ngtcp2_ksl_compar compar, const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs) { + return !compar(lhs, rhs) && !compar(rhs, lhs); +} + +int ngtcp2_ksl_remove_hint(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, + const ngtcp2_ksl_it *hint, + const ngtcp2_ksl_key *key) { + ngtcp2_ksl_blk *blk = hint->blk; + + assert(ksl->head); + + if (blk->n <= NGTCP2_KSL_MIN_NBLK) { + return ngtcp2_ksl_remove(ksl, it, key); + } + + ksl_remove_node(ksl, blk, hint->i); + + --ksl->n; + + if (it) { + if (hint->i == blk->n && blk->next) { + ngtcp2_ksl_it_init(it, ksl, blk->next, 0); + } else { + ngtcp2_ksl_it_init(it, ksl, blk, hint->i); + } + } + + return 0; +} + +int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, + const ngtcp2_ksl_key *key) { + ngtcp2_ksl_blk *blk = ksl->head; + ngtcp2_ksl_node *node; + size_t i; + + if (!ksl->head) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (!blk->leaf && blk->n == 2 && + ngtcp2_ksl_nth_node(ksl, blk, 0)->blk->n == NGTCP2_KSL_MIN_NBLK && + ngtcp2_ksl_nth_node(ksl, blk, 1)->blk->n == NGTCP2_KSL_MIN_NBLK) { + blk = ksl_merge_node(ksl, ksl->head, 0); + } + + for (;;) { + i = ksl_bsearch(ksl, blk, key, ksl->compar); + + if (i == blk->n) { + if (it) { + *it = ngtcp2_ksl_end(ksl); + } + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (blk->leaf) { + if (ksl->compar(key, ngtcp2_ksl_nth_node(ksl, blk, i)->key)) { + if (it) { + *it = ngtcp2_ksl_end(ksl); + } + return NGTCP2_ERR_INVALID_ARGUMENT; + } + ksl_remove_node(ksl, blk, i); + --ksl->n; + if (it) { + if (blk->n == i && blk->next) { + ngtcp2_ksl_it_init(it, ksl, blk->next, 0); + } else { + ngtcp2_ksl_it_init(it, ksl, blk, i); + } + } + return 0; + } + + node = ngtcp2_ksl_nth_node(ksl, blk, i); + + if (node->blk->n > NGTCP2_KSL_MIN_NBLK) { + blk = node->blk; + continue; + } + + assert(node->blk->n == NGTCP2_KSL_MIN_NBLK); + + if (i + 1 < blk->n && + ngtcp2_ksl_nth_node(ksl, blk, i + 1)->blk->n > NGTCP2_KSL_MIN_NBLK) { + ksl_shift_left(ksl, blk, i + 1); + blk = node->blk; + continue; + } + + if (i > 0 && + ngtcp2_ksl_nth_node(ksl, blk, i - 1)->blk->n > NGTCP2_KSL_MIN_NBLK) { + ksl_shift_right(ksl, blk, i - 1); + blk = node->blk; + continue; + } + + if (i + 1 < blk->n) { + blk = ksl_merge_node(ksl, blk, i); + continue; + } + + assert(i > 0); + + blk = ksl_merge_node(ksl, blk, i - 1); + } +} + +ngtcp2_ksl_it ngtcp2_ksl_lower_bound(ngtcp2_ksl *ksl, + const ngtcp2_ksl_key *key) { + ngtcp2_ksl_blk *blk = ksl->head; + ngtcp2_ksl_it it; + size_t i; + + if (!blk) { + ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0); + return it; + } + + for (;;) { + i = ksl_bsearch(ksl, blk, key, ksl->compar); + + if (blk->leaf) { + if (i == blk->n && blk->next) { + blk = blk->next; + i = 0; + } + ngtcp2_ksl_it_init(&it, ksl, blk, i); + return it; + } + + if (i == blk->n) { + /* This happens if descendant has smaller key. Fast forward to + find last node in this subtree. */ + for (; !blk->leaf; blk = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1)->blk) + ; + if (blk->next) { + blk = blk->next; + i = 0; + } else { + i = blk->n; + } + ngtcp2_ksl_it_init(&it, ksl, blk, i); + return it; + } + blk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk; + } +} + +ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl, + const ngtcp2_ksl_key *key, + ngtcp2_ksl_compar compar) { + ngtcp2_ksl_blk *blk = ksl->head; + ngtcp2_ksl_it it; + size_t i; + + if (!blk) { + ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0); + return it; + } + + for (;;) { + i = ksl_bsearch(ksl, blk, key, compar); + + if (blk->leaf) { + if (i == blk->n && blk->next) { + blk = blk->next; + i = 0; + } + ngtcp2_ksl_it_init(&it, ksl, blk, i); + return it; + } + + if (i == blk->n) { + /* This happens if descendant has smaller key. Fast forward to + find last node in this subtree. */ + for (; !blk->leaf; blk = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1)->blk) + ; + if (blk->next) { + blk = blk->next; + i = 0; + } else { + i = blk->n; + } + ngtcp2_ksl_it_init(&it, ksl, blk, i); + return it; + } + blk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk; + } +} + +void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key, + const ngtcp2_ksl_key *new_key) { + ngtcp2_ksl_blk *blk = ksl->head; + ngtcp2_ksl_node *node; + size_t i; + + assert(ksl->head); + + for (;;) { + i = ksl_bsearch(ksl, blk, old_key, ksl->compar); + + assert(i < blk->n); + node = ngtcp2_ksl_nth_node(ksl, blk, i); + + if (blk->leaf) { + assert(key_equal(ksl->compar, (ngtcp2_ksl_key *)node->key, old_key)); + ksl_node_set_key(ksl, node, new_key); + return; + } + + if (key_equal(ksl->compar, (ngtcp2_ksl_key *)node->key, old_key) || + ksl->compar((ngtcp2_ksl_key *)node->key, new_key)) { + ksl_node_set_key(ksl, node, new_key); + } + + blk = node->blk; + } +} + +static void ksl_print(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t level) { + size_t i; + ngtcp2_ksl_node *node; + + fprintf(stderr, "LV=%zu n=%u\n", level, blk->n); + + if (blk->leaf) { + for (i = 0; i < blk->n; ++i) { + node = ngtcp2_ksl_nth_node(ksl, blk, i); + fprintf(stderr, " %" PRId64, *(int64_t *)(void *)node->key); + } + fprintf(stderr, "\n"); + return; + } + + for (i = 0; i < blk->n; ++i) { + ksl_print(ksl, ngtcp2_ksl_nth_node(ksl, blk, i)->blk, level + 1); + } +} + +size_t ngtcp2_ksl_len(ngtcp2_ksl *ksl) { return ksl->n; } + +void ngtcp2_ksl_clear(ngtcp2_ksl *ksl) { + if (!ksl->head) { + return; + } + +#ifdef NOMEMPOOL + ksl_free_blk(ksl, ksl->head); +#endif /* NOMEMPOOL */ + + ksl->front = ksl->back = ksl->head = NULL; + ksl->n = 0; + + ngtcp2_objalloc_clear(&ksl->blkalloc); +} + +void ngtcp2_ksl_print(ngtcp2_ksl *ksl) { + if (!ksl->head) { + return; + } + + ksl_print(ksl, ksl->head, 0); +} + +ngtcp2_ksl_it ngtcp2_ksl_begin(const ngtcp2_ksl *ksl) { + ngtcp2_ksl_it it; + + if (ksl->head) { + ngtcp2_ksl_it_init(&it, ksl, ksl->front, 0); + } else { + ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0); + } + + return it; +} + +ngtcp2_ksl_it ngtcp2_ksl_end(const ngtcp2_ksl *ksl) { + ngtcp2_ksl_it it; + + if (ksl->head) { + ngtcp2_ksl_it_init(&it, ksl, ksl->back, ksl->back->n); + } else { + ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0); + } + + return it; +} + +void ngtcp2_ksl_it_init(ngtcp2_ksl_it *it, const ngtcp2_ksl *ksl, + ngtcp2_ksl_blk *blk, size_t i) { + it->ksl = ksl; + it->blk = blk; + it->i = i; +} + +void ngtcp2_ksl_it_prev(ngtcp2_ksl_it *it) { + assert(!ngtcp2_ksl_it_begin(it)); + + if (it->i == 0) { + it->blk = it->blk->prev; + it->i = it->blk->n - 1; + } else { + --it->i; + } +} + +int ngtcp2_ksl_it_begin(const ngtcp2_ksl_it *it) { + return it->i == 0 && it->blk->prev == NULL; +} + +int ngtcp2_ksl_range_compar(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs) { + const ngtcp2_range *a = lhs, *b = rhs; + return a->begin < b->begin; +} + +int ngtcp2_ksl_range_exclusive_compar(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs) { + const ngtcp2_range *a = lhs, *b = rhs; + return a->begin < b->begin && + !(ngtcp2_max(a->begin, b->begin) < ngtcp2_min(a->end, b->end)); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h new file mode 100644 index 0000000..312a151 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h @@ -0,0 +1,345 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_KSL_H +#define NGTCP2_KSL_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdlib.h> + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_objalloc.h" + +/* + * Skip List using single key instead of range. + */ + +#define NGTCP2_KSL_DEGR 16 +/* NGTCP2_KSL_MAX_NBLK is the maximum number of nodes which a single + block can contain. */ +#define NGTCP2_KSL_MAX_NBLK (2 * NGTCP2_KSL_DEGR - 1) +/* NGTCP2_KSL_MIN_NBLK is the minimum number of nodes which a single + block other than root must contains. */ +#define NGTCP2_KSL_MIN_NBLK (NGTCP2_KSL_DEGR - 1) + +/* + * ngtcp2_ksl_key represents key in ngtcp2_ksl. + */ +typedef void ngtcp2_ksl_key; + +typedef struct ngtcp2_ksl_node ngtcp2_ksl_node; + +typedef struct ngtcp2_ksl_blk ngtcp2_ksl_blk; + +/* + * ngtcp2_ksl_node is a node which contains either ngtcp2_ksl_blk or + * opaque data. If a node is an internal node, it contains + * ngtcp2_ksl_blk. Otherwise, it has data. The key is stored at the + * location starting at key. + */ +struct ngtcp2_ksl_node { + union { + ngtcp2_ksl_blk *blk; + void *data; + }; + union { + uint64_t align; + /* key is a buffer to include key associated to this node. + Because the length of key is unknown until ngtcp2_ksl_init is + called, the actual buffer will be allocated after this + field. */ + uint8_t key[1]; + }; +}; + +/* + * ngtcp2_ksl_blk contains ngtcp2_ksl_node objects. + */ +struct ngtcp2_ksl_blk { + union { + struct { + /* next points to the next block if leaf field is nonzero. */ + ngtcp2_ksl_blk *next; + /* prev points to the previous block if leaf field is nonzero. */ + ngtcp2_ksl_blk *prev; + /* n is the number of nodes this object contains in nodes. */ + uint32_t n; + /* leaf is nonzero if this block contains leaf nodes. */ + uint32_t leaf; + union { + uint64_t align; + /* nodes is a buffer to contain NGTCP2_KSL_MAX_NBLK + ngtcp2_ksl_node objects. Because ngtcp2_ksl_node object is + allocated along with the additional variable length key + storage, the size of buffer is unknown until ngtcp2_ksl_init is + called. */ + uint8_t nodes[1]; + }; + }; + + ngtcp2_opl_entry oplent; + }; +}; + +ngtcp2_objalloc_def(ksl_blk, ngtcp2_ksl_blk, oplent); + +/* + * ngtcp2_ksl_compar is a function type which returns nonzero if key + * |lhs| should be placed before |rhs|. It returns 0 otherwise. + */ +typedef int (*ngtcp2_ksl_compar)(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs); + +typedef struct ngtcp2_ksl ngtcp2_ksl; + +typedef struct ngtcp2_ksl_it ngtcp2_ksl_it; + +/* + * ngtcp2_ksl_it is a forward iterator to iterate nodes. + */ +struct ngtcp2_ksl_it { + const ngtcp2_ksl *ksl; + ngtcp2_ksl_blk *blk; + size_t i; +}; + +/* + * ngtcp2_ksl is a deterministic paged skip list. + */ +struct ngtcp2_ksl { + ngtcp2_objalloc blkalloc; + /* head points to the root block. */ + ngtcp2_ksl_blk *head; + /* front points to the first leaf block. */ + ngtcp2_ksl_blk *front; + /* back points to the last leaf block. */ + ngtcp2_ksl_blk *back; + ngtcp2_ksl_compar compar; + size_t n; + /* keylen is the size of key */ + size_t keylen; + /* nodelen is the actual size of ngtcp2_ksl_node including key + storage. */ + size_t nodelen; +}; + +/* + * ngtcp2_ksl_init initializes |ksl|. |compar| specifies compare + * function. |keylen| is the length of key. + */ +void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, size_t keylen, + const ngtcp2_mem *mem); + +/* + * ngtcp2_ksl_free frees resources allocated for |ksl|. If |ksl| is + * NULL, this function does nothing. It does not free the memory + * region pointed by |ksl| itself. + */ +void ngtcp2_ksl_free(ngtcp2_ksl *ksl); + +/* + * ngtcp2_ksl_insert inserts |key| with its associated |data|. On + * successful insertion, the iterator points to the inserted node is + * stored in |*it|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + * NGTCP2_ERR_INVALID_ARGUMENT + * |key| already exists. + */ +int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, + const ngtcp2_ksl_key *key, void *data); + +/* + * ngtcp2_ksl_remove removes the |key| from |ksl|. + * + * This function assigns the iterator to |*it|, which points to the + * node which is located at the right next of the removed node if |it| + * is not NULL. If |key| is not found, no deletion takes place and + * the return value of ngtcp2_ksl_end(ksl) is assigned to |*it|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * |key| does not exist. + */ +int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, + const ngtcp2_ksl_key *key); + +/* + * ngtcp2_ksl_remove_hint removes the |key| from |ksl|. |hint| must + * point to the same node denoted by |key|. |hint| is used to remove + * a node efficiently in some cases. Other than that, it behaves + * exactly like ngtcp2_ksl_remove. |it| and |hint| can point to the + * same object. + */ +int ngtcp2_ksl_remove_hint(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it, + const ngtcp2_ksl_it *hint, + const ngtcp2_ksl_key *key); + +/* + * ngtcp2_ksl_lower_bound returns the iterator which points to the + * first node which has the key which is equal to |key| or the last + * node which satisfies !compar(&node->key, key). If there is no such + * node, it returns the iterator which satisfies ngtcp2_ksl_it_end(it) + * != 0. + */ +ngtcp2_ksl_it ngtcp2_ksl_lower_bound(ngtcp2_ksl *ksl, + const ngtcp2_ksl_key *key); + +/* + * ngtcp2_ksl_lower_bound_compar works like ngtcp2_ksl_lower_bound, + * but it takes custom function |compar| to do lower bound search. + */ +ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl, + const ngtcp2_ksl_key *key, + ngtcp2_ksl_compar compar); + +/* + * ngtcp2_ksl_update_key replaces the key of nodes which has |old_key| + * with |new_key|. |new_key| must be strictly greater than the + * previous node and strictly smaller than the next node. + */ +void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key, + const ngtcp2_ksl_key *new_key); + +/* + * ngtcp2_ksl_begin returns the iterator which points to the first + * node. If there is no node in |ksl|, it returns the iterator which + * satisfies ngtcp2_ksl_it_end(it) != 0. + */ +ngtcp2_ksl_it ngtcp2_ksl_begin(const ngtcp2_ksl *ksl); + +/* + * ngtcp2_ksl_end returns the iterator which points to the node + * following the last node. The returned object satisfies + * ngtcp2_ksl_it_end(). If there is no node in |ksl|, it returns the + * iterator which satisfies ngtcp2_ksl_it_begin(it) != 0. + */ +ngtcp2_ksl_it ngtcp2_ksl_end(const ngtcp2_ksl *ksl); + +/* + * ngtcp2_ksl_len returns the number of elements stored in |ksl|. + */ +size_t ngtcp2_ksl_len(ngtcp2_ksl *ksl); + +/* + * ngtcp2_ksl_clear removes all elements stored in |ksl|. + */ +void ngtcp2_ksl_clear(ngtcp2_ksl *ksl); + +/* + * ngtcp2_ksl_nth_node returns the |n|th node under |blk|. + */ +#define ngtcp2_ksl_nth_node(KSL, BLK, N) \ + ((ngtcp2_ksl_node *)(void *)((BLK)->nodes + (KSL)->nodelen * (N))) + +/* + * ngtcp2_ksl_print prints its internal state in stderr. It assumes + * that the key is of type int64_t. This function should be used for + * the debugging purpose only. + */ +void ngtcp2_ksl_print(ngtcp2_ksl *ksl); + +/* + * ngtcp2_ksl_it_init initializes |it|. + */ +void ngtcp2_ksl_it_init(ngtcp2_ksl_it *it, const ngtcp2_ksl *ksl, + ngtcp2_ksl_blk *blk, size_t i); + +/* + * ngtcp2_ksl_it_get returns the data associated to the node which + * |it| points to. It is undefined to call this function when + * ngtcp2_ksl_it_end(it) returns nonzero. + */ +#define ngtcp2_ksl_it_get(IT) \ + ngtcp2_ksl_nth_node((IT)->ksl, (IT)->blk, (IT)->i)->data + +/* + * ngtcp2_ksl_it_next advances the iterator by one. It is undefined + * if this function is called when ngtcp2_ksl_it_end(it) returns + * nonzero. + */ +#define ngtcp2_ksl_it_next(IT) \ + (++(IT)->i == (IT)->blk->n && (IT)->blk->next \ + ? ((IT)->blk = (IT)->blk->next, (IT)->i = 0) \ + : 0) + +/* + * ngtcp2_ksl_it_prev moves backward the iterator by one. It is + * undefined if this function is called when ngtcp2_ksl_it_begin(it) + * returns nonzero. + */ +void ngtcp2_ksl_it_prev(ngtcp2_ksl_it *it); + +/* + * ngtcp2_ksl_it_end returns nonzero if |it| points to the beyond the + * last node. + */ +#define ngtcp2_ksl_it_end(IT) \ + ((IT)->blk->n == (IT)->i && (IT)->blk->next == NULL) + +/* + * ngtcp2_ksl_it_begin returns nonzero if |it| points to the first + * node. |it| might satisfy both ngtcp2_ksl_it_begin(&it) and + * ngtcp2_ksl_it_end(&it) if the skip list has no node. + */ +int ngtcp2_ksl_it_begin(const ngtcp2_ksl_it *it); + +/* + * ngtcp2_ksl_key returns the key of the node which |it| points to. + * It is undefined to call this function when ngtcp2_ksl_it_end(it) + * returns nonzero. + */ +#define ngtcp2_ksl_it_key(IT) \ + ((ngtcp2_ksl_key *)ngtcp2_ksl_nth_node((IT)->ksl, (IT)->blk, (IT)->i)->key) + +/* + * ngtcp2_ksl_range_compar is an implementation of ngtcp2_ksl_compar. + * lhs->ptr and rhs->ptr must point to ngtcp2_range object and the + * function returns nonzero if (const ngtcp2_range *)(lhs->ptr)->begin + * < (const ngtcp2_range *)(rhs->ptr)->begin. + */ +int ngtcp2_ksl_range_compar(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs); + +/* + * ngtcp2_ksl_range_exclusive_compar is an implementation of + * ngtcp2_ksl_compar. lhs->ptr and rhs->ptr must point to + * ngtcp2_range object and the function returns nonzero if (const + * ngtcp2_range *)(lhs->ptr)->begin < (const ngtcp2_range + * *)(rhs->ptr)->begin and the 2 ranges do not intersect. + */ +int ngtcp2_ksl_range_exclusive_compar(const ngtcp2_ksl_key *lhs, + const ngtcp2_ksl_key *rhs); + +#endif /* NGTCP2_KSL_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c new file mode 100644 index 0000000..790adeb --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c @@ -0,0 +1,822 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_log.h" + +#include <stdio.h> +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif +#include <assert.h> +#include <string.h> + +#include "ngtcp2_str.h" +#include "ngtcp2_vec.h" +#include "ngtcp2_macro.h" +#include "ngtcp2_conv.h" +#include "ngtcp2_unreachable.h" +#include "ngtcp2_net.h" + +void ngtcp2_log_init(ngtcp2_log *log, const ngtcp2_cid *scid, + ngtcp2_printf log_printf, ngtcp2_tstamp ts, + void *user_data) { + if (scid) { + ngtcp2_encode_hex(log->scid, scid->data, scid->datalen); + } else { + log->scid[0] = '\0'; + } + log->log_printf = log_printf; + log->ts = log->last_ts = ts; + log->user_data = user_data; +} + +/* + * # Log header + * + * <LEVEL><TIMESTAMP> <SCID> <EVENT> + * + * <LEVEL>: + * Log level. I=Info, W=Warning, E=Error + * + * <TIMESTAMP>: + * Timestamp relative to ngtcp2_log.ts field in milliseconds + * resolution. + * + * <SCID>: + * Source Connection ID in hex string. + * + * <EVENT>: + * Event. pkt=packet, frm=frame, rcv=recovery, cry=crypto, + * con=connection(catch all) + * + * # Frame event + * + * <DIR> <PKN> <PKTNAME> <FRAMENAME>(<FRAMETYPE>) + * + * <DIR>: + * Flow direction. tx=transmission, rx=reception + * + * <PKN>: + * Packet number. + * + * <PKTNAME>: + * Packet name. (e.g., Initial, Handshake, 1RTT) + * + * <FRAMENAME>: + * Frame name. (e.g., STREAM, ACK, PING) + * + * <FRAMETYPE>: + * Frame type in hex string. + */ + +#define NGTCP2_LOG_BUFLEN 4096 + +/* TODO Split second and remaining fraction with comma */ +#define NGTCP2_LOG_HD "I%08" PRIu64 " 0x%s %s" +#define NGTCP2_LOG_PKT NGTCP2_LOG_HD " %s %" PRId64 " %s" +#define NGTCP2_LOG_TP NGTCP2_LOG_HD " remote transport_parameters" + +#define NGTCP2_LOG_FRM_HD_FIELDS(DIR) \ + timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "frm", \ + (DIR), hd->pkt_num, strpkttype(hd) + +#define NGTCP2_LOG_PKT_HD_FIELDS(DIR) \ + timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "pkt", \ + (DIR), hd->pkt_num, strpkttype(hd) + +#define NGTCP2_LOG_TP_HD_FIELDS \ + timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "cry" + +static const char *strerrorcode(uint64_t error_code) { + switch (error_code) { + case NGTCP2_NO_ERROR: + return "NO_ERROR"; + case NGTCP2_INTERNAL_ERROR: + return "INTERNAL_ERROR"; + case NGTCP2_CONNECTION_REFUSED: + return "CONNECTION_REFUSED"; + case NGTCP2_FLOW_CONTROL_ERROR: + return "FLOW_CONTROL_ERROR"; + case NGTCP2_STREAM_LIMIT_ERROR: + return "STREAM_LIMIT_ERROR"; + case NGTCP2_STREAM_STATE_ERROR: + return "STREAM_STATE_ERROR"; + case NGTCP2_FINAL_SIZE_ERROR: + return "FINAL_SIZE_ERROR"; + case NGTCP2_FRAME_ENCODING_ERROR: + return "FRAME_ENCODING_ERROR"; + case NGTCP2_TRANSPORT_PARAMETER_ERROR: + return "TRANSPORT_PARAMETER_ERROR"; + case NGTCP2_CONNECTION_ID_LIMIT_ERROR: + return "CONNECTION_ID_LIMIT_ERROR"; + case NGTCP2_PROTOCOL_VIOLATION: + return "PROTOCOL_VIOLATION"; + case NGTCP2_INVALID_TOKEN: + return "INVALID_TOKEN"; + case NGTCP2_APPLICATION_ERROR: + return "APPLICATION_ERROR"; + case NGTCP2_CRYPTO_BUFFER_EXCEEDED: + return "CRYPTO_BUFFER_EXCEEDED"; + case NGTCP2_KEY_UPDATE_ERROR: + return "KEY_UPDATE_ERROR"; + case NGTCP2_VERSION_NEGOTIATION_ERROR: + return "VERSION_NEGOTIATION_ERROR"; + default: + if (0x100u <= error_code && error_code <= 0x1ffu) { + return "CRYPTO_ERROR"; + } + return "(unknown)"; + } +} + +static const char *strapperrorcode(uint64_t app_error_code) { + (void)app_error_code; + return "(unknown)"; +} + +static const char *strpkttype_long(uint8_t type) { + switch (type) { + case NGTCP2_PKT_INITIAL: + return "Initial"; + case NGTCP2_PKT_RETRY: + return "Retry"; + case NGTCP2_PKT_HANDSHAKE: + return "Handshake"; + case NGTCP2_PKT_0RTT: + return "0RTT"; + default: + return "(unknown)"; + } +} + +static const char *strpkttype(const ngtcp2_pkt_hd *hd) { + if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) { + return strpkttype_long(hd->type); + } + + switch (hd->type) { + case NGTCP2_PKT_VERSION_NEGOTIATION: + return "VN"; + case NGTCP2_PKT_STATELESS_RESET: + return "SR"; + case NGTCP2_PKT_1RTT: + return "1RTT"; + default: + return "(unknown)"; + } +} + +static const char *strpkttype_type_flags(uint8_t type, uint8_t flags) { + ngtcp2_pkt_hd hd = {0}; + + hd.type = type; + hd.flags = flags; + + return strpkttype(&hd); +} + +static const char *strevent(ngtcp2_log_event ev) { + switch (ev) { + case NGTCP2_LOG_EVENT_CON: + return "con"; + case NGTCP2_LOG_EVENT_PKT: + return "pkt"; + case NGTCP2_LOG_EVENT_FRM: + return "frm"; + case NGTCP2_LOG_EVENT_RCV: + return "rcv"; + case NGTCP2_LOG_EVENT_CRY: + return "cry"; + case NGTCP2_LOG_EVENT_PTV: + return "ptv"; + case NGTCP2_LOG_EVENT_NONE: + default: + return "non"; + } +} + +static uint64_t timestamp_cast(uint64_t ns) { return ns / NGTCP2_MILLISECONDS; } + +static void log_fr_stream(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_stream *fr, const char *dir) { + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " STREAM(0x%02x) id=0x%" PRIx64 " fin=%d offset=%" PRIu64 + " len=%" PRIu64 " uni=%d"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type | fr->flags, fr->stream_id, + fr->fin, fr->offset, ngtcp2_vec_len(fr->data, fr->datacnt), + (fr->stream_id & 0x2) != 0); +} + +static void log_fr_ack(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_ack *fr, const char *dir) { + int64_t largest_ack, min_ack; + size_t i; + + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " ACK(0x%02x) largest_ack=%" PRId64 + " ack_delay=%" PRIu64 "(%" PRIu64 + ") ack_range_count=%zu"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->largest_ack, + fr->ack_delay_unscaled / NGTCP2_MILLISECONDS, fr->ack_delay, + fr->rangecnt); + + largest_ack = fr->largest_ack; + min_ack = fr->largest_ack - (int64_t)fr->first_ack_range; + + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " ACK(0x%02x) range=[%" PRId64 "..%" PRId64 + "] len=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, largest_ack, min_ack, + fr->first_ack_range); + + for (i = 0; i < fr->rangecnt; ++i) { + const ngtcp2_ack_range *range = &fr->ranges[i]; + largest_ack = min_ack - (int64_t)range->gap - 2; + min_ack = largest_ack - (int64_t)range->len; + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " ACK(0x%02x) range=[%" PRId64 "..%" PRId64 + "] gap=%" PRIu64 " len=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, largest_ack, + min_ack, range->gap, range->len); + } + + if (fr->type == NGTCP2_FRAME_ACK_ECN) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " ACK(0x%02x) ect0=%" PRIu64 + " ect1=%" PRIu64 " ce=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->ecn.ect0, + fr->ecn.ect1, fr->ecn.ce); + } +} + +static void log_fr_padding(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_padding *fr, const char *dir) { + log->log_printf(log->user_data, (NGTCP2_LOG_PKT " PADDING(0x%02x) len=%zu"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->len); +} + +static void log_fr_reset_stream(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_reset_stream *fr, + const char *dir) { + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " RESET_STREAM(0x%02x) id=0x%" PRIx64 + " app_error_code=%s(0x%" PRIx64 ") final_size=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id, + strapperrorcode(fr->app_error_code), fr->app_error_code, fr->final_size); +} + +static void log_fr_connection_close(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_connection_close *fr, + const char *dir) { + char reason[256]; + size_t reasonlen = ngtcp2_min(sizeof(reason) - 1, fr->reasonlen); + + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT + " CONNECTION_CLOSE(0x%02x) error_code=%s(0x%" PRIx64 ") " + "frame_type=%" PRIx64 " reason_len=%zu reason=[%s]"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, + fr->type == NGTCP2_FRAME_CONNECTION_CLOSE + ? strerrorcode(fr->error_code) + : strapperrorcode(fr->error_code), + fr->error_code, fr->frame_type, fr->reasonlen, + ngtcp2_encode_printable_ascii(reason, fr->reason, reasonlen)); +} + +static void log_fr_max_data(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_max_data *fr, const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " MAX_DATA(0x%02x) max_data=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_data); +} + +static void log_fr_max_stream_data(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_max_stream_data *fr, + const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " MAX_STREAM_DATA(0x%02x) id=0x%" PRIx64 + " max_stream_data=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id, + fr->max_stream_data); +} + +static void log_fr_max_streams(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_max_streams *fr, const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " MAX_STREAMS(0x%02x) max_streams=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams); +} + +static void log_fr_ping(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_ping *fr, const char *dir) { + log->log_printf(log->user_data, (NGTCP2_LOG_PKT " PING(0x%02x)"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type); +} + +static void log_fr_data_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_data_blocked *fr, + const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " DATA_BLOCKED(0x%02x) offset=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset); +} + +static void log_fr_stream_data_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_stream_data_blocked *fr, + const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " STREAM_DATA_BLOCKED(0x%02x) id=0x%" PRIx64 + " offset=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id, + fr->offset); +} + +static void log_fr_streams_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_streams_blocked *fr, + const char *dir) { + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " STREAMS_BLOCKED(0x%02x) max_streams=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams); +} + +static void log_fr_new_connection_id(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_new_connection_id *fr, + const char *dir) { + uint8_t buf[sizeof(fr->stateless_reset_token) * 2 + 1]; + uint8_t cid[sizeof(fr->cid.data) * 2 + 1]; + + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " NEW_CONNECTION_ID(0x%02x) seq=%" PRIu64 + " cid=0x%s retire_prior_to=%" PRIu64 + " stateless_reset_token=0x%s"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq, + (const char *)ngtcp2_encode_hex(cid, fr->cid.data, fr->cid.datalen), + fr->retire_prior_to, + (const char *)ngtcp2_encode_hex(buf, fr->stateless_reset_token, + sizeof(fr->stateless_reset_token))); +} + +static void log_fr_stop_sending(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_stop_sending *fr, + const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " STOP_SENDING(0x%02x) id=0x%" PRIx64 + " app_error_code=%s(0x%" PRIx64 ")"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id, + strapperrorcode(fr->app_error_code), fr->app_error_code); +} + +static void log_fr_path_challenge(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_path_challenge *fr, + const char *dir) { + uint8_t buf[sizeof(fr->data) * 2 + 1]; + + log->log_printf( + log->user_data, (NGTCP2_LOG_PKT " PATH_CHALLENGE(0x%02x) data=0x%s"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, + (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data))); +} + +static void log_fr_path_response(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_path_response *fr, + const char *dir) { + uint8_t buf[sizeof(fr->data) * 2 + 1]; + + log->log_printf( + log->user_data, (NGTCP2_LOG_PKT " PATH_RESPONSE(0x%02x) data=0x%s"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, + (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data))); +} + +static void log_fr_crypto(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_crypto *fr, const char *dir) { + log->log_printf( + log->user_data, + (NGTCP2_LOG_PKT " CRYPTO(0x%02x) offset=%" PRIu64 " len=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset, + ngtcp2_vec_len(fr->data, fr->datacnt)); +} + +static void log_fr_new_token(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_new_token *fr, const char *dir) { + /* Show at most first 64 bytes of token. If token is longer than 64 + bytes, log first 64 bytes and then append "*" */ + uint8_t buf[128 + 1 + 1]; + uint8_t *p; + + if (fr->tokenlen > 64) { + p = ngtcp2_encode_hex(buf, fr->token, 64); + p[128] = '*'; + p[129] = '\0'; + } else { + p = ngtcp2_encode_hex(buf, fr->token, fr->tokenlen); + } + log->log_printf( + log->user_data, (NGTCP2_LOG_PKT " NEW_TOKEN(0x%02x) token=0x%s len=%zu"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, (const char *)p, fr->tokenlen); +} + +static void log_fr_retire_connection_id(ngtcp2_log *log, + const ngtcp2_pkt_hd *hd, + const ngtcp2_retire_connection_id *fr, + const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " RETIRE_CONNECTION_ID(0x%02x) seq=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq); +} + +static void log_fr_handshake_done(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_handshake_done *fr, + const char *dir) { + log->log_printf(log->user_data, (NGTCP2_LOG_PKT " HANDSHAKE_DONE(0x%02x)"), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type); +} + +static void log_fr_datagram(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_datagram *fr, const char *dir) { + log->log_printf(log->user_data, + (NGTCP2_LOG_PKT " DATAGRAM(0x%02x) len=%" PRIu64), + NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, + ngtcp2_vec_len(fr->data, fr->datacnt)); +} + +static void log_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_frame *fr, const char *dir) { + switch (fr->type) { + case NGTCP2_FRAME_STREAM: + log_fr_stream(log, hd, &fr->stream, dir); + break; + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + log_fr_ack(log, hd, &fr->ack, dir); + break; + case NGTCP2_FRAME_PADDING: + log_fr_padding(log, hd, &fr->padding, dir); + break; + case NGTCP2_FRAME_RESET_STREAM: + log_fr_reset_stream(log, hd, &fr->reset_stream, dir); + break; + case NGTCP2_FRAME_CONNECTION_CLOSE: + case NGTCP2_FRAME_CONNECTION_CLOSE_APP: + log_fr_connection_close(log, hd, &fr->connection_close, dir); + break; + case NGTCP2_FRAME_MAX_DATA: + log_fr_max_data(log, hd, &fr->max_data, dir); + break; + case NGTCP2_FRAME_MAX_STREAM_DATA: + log_fr_max_stream_data(log, hd, &fr->max_stream_data, dir); + break; + case NGTCP2_FRAME_MAX_STREAMS_BIDI: + case NGTCP2_FRAME_MAX_STREAMS_UNI: + log_fr_max_streams(log, hd, &fr->max_streams, dir); + break; + case NGTCP2_FRAME_PING: + log_fr_ping(log, hd, &fr->ping, dir); + break; + case NGTCP2_FRAME_DATA_BLOCKED: + log_fr_data_blocked(log, hd, &fr->data_blocked, dir); + break; + case NGTCP2_FRAME_STREAM_DATA_BLOCKED: + log_fr_stream_data_blocked(log, hd, &fr->stream_data_blocked, dir); + break; + case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: + case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: + log_fr_streams_blocked(log, hd, &fr->streams_blocked, dir); + break; + case NGTCP2_FRAME_NEW_CONNECTION_ID: + log_fr_new_connection_id(log, hd, &fr->new_connection_id, dir); + break; + case NGTCP2_FRAME_STOP_SENDING: + log_fr_stop_sending(log, hd, &fr->stop_sending, dir); + break; + case NGTCP2_FRAME_PATH_CHALLENGE: + log_fr_path_challenge(log, hd, &fr->path_challenge, dir); + break; + case NGTCP2_FRAME_PATH_RESPONSE: + log_fr_path_response(log, hd, &fr->path_response, dir); + break; + case NGTCP2_FRAME_CRYPTO: + log_fr_crypto(log, hd, &fr->crypto, dir); + break; + case NGTCP2_FRAME_NEW_TOKEN: + log_fr_new_token(log, hd, &fr->new_token, dir); + break; + case NGTCP2_FRAME_RETIRE_CONNECTION_ID: + log_fr_retire_connection_id(log, hd, &fr->retire_connection_id, dir); + break; + case NGTCP2_FRAME_HANDSHAKE_DONE: + log_fr_handshake_done(log, hd, &fr->handshake_done, dir); + break; + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + log_fr_datagram(log, hd, &fr->datagram, dir); + break; + default: + ngtcp2_unreachable(); + } +} + +void ngtcp2_log_rx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_frame *fr) { + if (!log->log_printf) { + return; + } + + log_fr(log, hd, fr, "rx"); +} + +void ngtcp2_log_tx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_frame *fr) { + if (!log->log_printf) { + return; + } + + log_fr(log, hd, fr, "tx"); +} + +void ngtcp2_log_rx_vn(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const uint32_t *sv, size_t nsv) { + size_t i; + + if (!log->log_printf) { + return; + } + + for (i = 0; i < nsv; ++i) { + log->log_printf(log->user_data, (NGTCP2_LOG_PKT " v=0x%08x"), + NGTCP2_LOG_PKT_HD_FIELDS("rx"), sv[i]); + } +} + +void ngtcp2_log_rx_sr(ngtcp2_log *log, const ngtcp2_pkt_stateless_reset *sr) { + uint8_t buf[sizeof(sr->stateless_reset_token) * 2 + 1]; + ngtcp2_pkt_hd shd; + ngtcp2_pkt_hd *hd = &shd; + + if (!log->log_printf) { + return; + } + + memset(&shd, 0, sizeof(shd)); + + shd.type = NGTCP2_PKT_STATELESS_RESET; + + log->log_printf( + log->user_data, (NGTCP2_LOG_PKT " token=0x%s randlen=%zu"), + NGTCP2_LOG_PKT_HD_FIELDS("rx"), + (const char *)ngtcp2_encode_hex(buf, sr->stateless_reset_token, + sizeof(sr->stateless_reset_token)), + sr->randlen); +} + +void ngtcp2_log_remote_tp(ngtcp2_log *log, uint8_t exttype, + const ngtcp2_transport_params *params) { + uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN * 2 + 1]; + uint8_t addr[16 * 2 + 7 + 1]; + uint8_t cid[NGTCP2_MAX_CIDLEN * 2 + 1]; + size_t i; + const ngtcp2_sockaddr_in *sa_in; + const ngtcp2_sockaddr_in6 *sa_in6; + const uint8_t *p; + uint32_t version; + + if (!log->log_printf) { + return; + } + + if (exttype == NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) { + if (params->stateless_reset_token_present) { + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " stateless_reset_token=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex( + token, params->stateless_reset_token, + sizeof(params->stateless_reset_token))); + } + + if (params->preferred_address_present) { + if (params->preferred_address.ipv4_present) { + sa_in = ¶ms->preferred_address.ipv4; + + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " preferred_address.ipv4_addr=%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_ipv4( + addr, (const uint8_t *)&sa_in->sin_addr)); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " preferred_address.ipv4_port=%u"), + NGTCP2_LOG_TP_HD_FIELDS, ngtcp2_ntohs(sa_in->sin_port)); + } + + if (params->preferred_address.ipv6_present) { + sa_in6 = ¶ms->preferred_address.ipv6; + + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " preferred_address.ipv6_addr=%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_ipv6( + addr, (const uint8_t *)&sa_in6->sin6_addr)); + log->log_printf( + log->user_data, (NGTCP2_LOG_TP " preferred_address.ipv6_port=%u"), + NGTCP2_LOG_TP_HD_FIELDS, ngtcp2_ntohs(sa_in6->sin6_port)); + } + + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " preferred_address.cid=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex( + cid, params->preferred_address.cid.data, + params->preferred_address.cid.datalen)); + log->log_printf( + log->user_data, + (NGTCP2_LOG_TP " preferred_address.stateless_reset_token=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex( + token, params->preferred_address.stateless_reset_token, + sizeof(params->preferred_address.stateless_reset_token))); + } + + log->log_printf( + log->user_data, + (NGTCP2_LOG_TP " original_destination_connection_id=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(cid, params->original_dcid.data, + params->original_dcid.datalen)); + + if (params->retry_scid_present) { + log->log_printf( + log->user_data, (NGTCP2_LOG_TP " retry_source_connection_id=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(cid, params->retry_scid.data, + params->retry_scid.datalen)); + } + } + + log->log_printf( + log->user_data, (NGTCP2_LOG_TP " initial_source_connection_id=0x%s"), + NGTCP2_LOG_TP_HD_FIELDS, + (const char *)ngtcp2_encode_hex(cid, params->initial_scid.data, + params->initial_scid.datalen)); + + log->log_printf( + log->user_data, + (NGTCP2_LOG_TP " initial_max_stream_data_bidi_local=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_local); + log->log_printf( + log->user_data, + (NGTCP2_LOG_TP " initial_max_stream_data_bidi_remote=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_remote); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " initial_max_stream_data_uni=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_uni); + log->log_printf(log->user_data, (NGTCP2_LOG_TP " initial_max_data=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_data); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " initial_max_streams_bidi=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_streams_bidi); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " initial_max_streams_uni=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_streams_uni); + log->log_printf(log->user_data, (NGTCP2_LOG_TP " max_idle_timeout=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, + params->max_idle_timeout / NGTCP2_MILLISECONDS); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " max_udp_payload_size=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->max_udp_payload_size); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " ack_delay_exponent=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->ack_delay_exponent); + log->log_printf(log->user_data, (NGTCP2_LOG_TP " max_ack_delay=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, + params->max_ack_delay / NGTCP2_MILLISECONDS); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " active_connection_id_limit=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->active_connection_id_limit); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " disable_active_migration=%d"), + NGTCP2_LOG_TP_HD_FIELDS, params->disable_active_migration); + log->log_printf(log->user_data, + (NGTCP2_LOG_TP " max_datagram_frame_size=%" PRIu64), + NGTCP2_LOG_TP_HD_FIELDS, params->max_datagram_frame_size); + log->log_printf(log->user_data, (NGTCP2_LOG_TP " grease_quic_bit=%d"), + NGTCP2_LOG_TP_HD_FIELDS, params->grease_quic_bit); + + if (params->version_info_present) { + log->log_printf( + log->user_data, + (NGTCP2_LOG_TP " version_information.chosen_version=0x%08x"), + NGTCP2_LOG_TP_HD_FIELDS, params->version_info.chosen_version); + + assert(!(params->version_info.available_versionslen & 0x3)); + + for (i = 0, p = params->version_info.available_versions; + i < params->version_info.available_versionslen; + i += sizeof(uint32_t)) { + p = ngtcp2_get_uint32(&version, p); + + log->log_printf( + log->user_data, + (NGTCP2_LOG_TP " version_information.available_versions[%zu]=0x%08x"), + NGTCP2_LOG_TP_HD_FIELDS, i >> 2, version); + } + } +} + +void ngtcp2_log_pkt_lost(ngtcp2_log *log, int64_t pkt_num, uint8_t type, + uint8_t flags, ngtcp2_tstamp sent_ts) { + if (!log->log_printf) { + return; + } + + ngtcp2_log_info(log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 " lost type=%s sent_ts=%" PRIu64, pkt_num, + strpkttype_type_flags(type, flags), sent_ts); +} + +static void log_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const char *dir) { + uint8_t dcid[sizeof(hd->dcid.data) * 2 + 1]; + uint8_t scid[sizeof(hd->scid.data) * 2 + 1]; + + if (!log->log_printf) { + return; + } + + if (hd->type == NGTCP2_PKT_1RTT) { + ngtcp2_log_info( + log, NGTCP2_LOG_EVENT_PKT, "%s pkn=%" PRId64 " dcid=0x%s type=%s k=%d", + dir, hd->pkt_num, + (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen), + strpkttype(hd), (hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE) != 0); + } else { + ngtcp2_log_info( + log, NGTCP2_LOG_EVENT_PKT, + "%s pkn=%" PRId64 " dcid=0x%s scid=0x%s version=0x%08x type=%s len=%zu", + dir, hd->pkt_num, + (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen), + (const char *)ngtcp2_encode_hex(scid, hd->scid.data, hd->scid.datalen), + hd->version, strpkttype(hd), hd->len); + } +} + +void ngtcp2_log_rx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd) { + log_pkt_hd(log, hd, "rx"); +} + +void ngtcp2_log_tx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd) { + log_pkt_hd(log, hd, "tx"); +} + +void ngtcp2_log_info(ngtcp2_log *log, ngtcp2_log_event ev, const char *fmt, + ...) { + va_list ap; + int n; + char buf[NGTCP2_LOG_BUFLEN]; + + if (!log->log_printf) { + return; + } + + va_start(ap, fmt); + n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (n < 0 || (size_t)n >= sizeof(buf)) { + return; + } + + log->log_printf(log->user_data, (NGTCP2_LOG_HD " %s"), + timestamp_cast(log->last_ts - log->ts), log->scid, + strevent(ev), buf); +} + +void ngtcp2_log_tx_cancel(ngtcp2_log *log, const ngtcp2_pkt_hd *hd) { + ngtcp2_log_info(log, NGTCP2_LOG_EVENT_PKT, + "cancel tx pkn=%" PRId64 " type=%s", hd->pkt_num, + strpkttype(hd)); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h new file mode 100644 index 0000000..029ef1b --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h @@ -0,0 +1,123 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_LOG_H +#define NGTCP2_LOG_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_pkt.h" + +typedef struct ngtcp2_log { + /* log_printf is a sink to write log. NULL means no logging + output. */ + ngtcp2_printf log_printf; + /* ts is the time point used to write time delta in the log. */ + ngtcp2_tstamp ts; + /* last_ts is the most recent time point that this object is + told. */ + ngtcp2_tstamp last_ts; + /* user_data is user-defined opaque data which is passed to + log_pritnf. */ + void *user_data; + /* scid is SCID encoded as NULL-terminated hex string. */ + uint8_t scid[NGTCP2_MAX_CIDLEN * 2 + 1]; +} ngtcp2_log; + +/** + * @enum + * + * :type:`ngtcp2_log_event` defines an event of ngtcp2 library + * internal logger. + */ +typedef enum ngtcp2_log_event { + /** + * :enum:`NGTCP2_LOG_EVENT_NONE` represents no event. + */ + NGTCP2_LOG_EVENT_NONE, + /** + * :enum:`NGTCP2_LOG_EVENT_CON` is a connection (catch-all) event + */ + NGTCP2_LOG_EVENT_CON, + /** + * :enum:`NGTCP2_LOG_EVENT_PKT` is a packet event. + */ + NGTCP2_LOG_EVENT_PKT, + /** + * :enum:`NGTCP2_LOG_EVENT_FRM` is a QUIC frame event. + */ + NGTCP2_LOG_EVENT_FRM, + /** + * :enum:`NGTCP2_LOG_EVENT_RCV` is a congestion and recovery event. + */ + NGTCP2_LOG_EVENT_RCV, + /** + * :enum:`NGTCP2_LOG_EVENT_CRY` is a crypto event. + */ + NGTCP2_LOG_EVENT_CRY, + /** + * :enum:`NGTCP2_LOG_EVENT_PTV` is a path validation event. + */ + NGTCP2_LOG_EVENT_PTV, +} ngtcp2_log_event; + +void ngtcp2_log_init(ngtcp2_log *log, const ngtcp2_cid *scid, + ngtcp2_printf log_printf, ngtcp2_tstamp ts, + void *user_data); + +void ngtcp2_log_rx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_frame *fr); +void ngtcp2_log_tx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const ngtcp2_frame *fr); + +void ngtcp2_log_rx_vn(ngtcp2_log *log, const ngtcp2_pkt_hd *hd, + const uint32_t *sv, size_t nsv); + +void ngtcp2_log_rx_sr(ngtcp2_log *log, const ngtcp2_pkt_stateless_reset *sr); + +void ngtcp2_log_remote_tp(ngtcp2_log *log, uint8_t exttype, + const ngtcp2_transport_params *params); + +void ngtcp2_log_pkt_lost(ngtcp2_log *log, int64_t pkt_num, uint8_t type, + uint8_t flags, ngtcp2_tstamp sent_ts); + +void ngtcp2_log_rx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd); + +void ngtcp2_log_tx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd); + +void ngtcp2_log_tx_cancel(ngtcp2_log *log, const ngtcp2_pkt_hd *hd); + +/** + * @function + * + * `ngtcp2_log_info` writes info level log. + */ +void ngtcp2_log_info(ngtcp2_log *log, ngtcp2_log_event ev, const char *fmt, + ...); + +#endif /* NGTCP2_LOG_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h new file mode 100644 index 0000000..28d3461 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h @@ -0,0 +1,58 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_MACRO_H +#define NGTCP2_MACRO_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stddef.h> + +#include <ngtcp2/ngtcp2.h> + +#define ngtcp2_min(A, B) ((A) < (B) ? (A) : (B)) +#define ngtcp2_max(A, B) ((A) > (B) ? (A) : (B)) + +#define ngtcp2_struct_of(ptr, type, member) \ + ((type *)(void *)((char *)(ptr)-offsetof(type, member))) + +/* ngtcp2_list_insert inserts |T| before |*PD|. The contract is that + this is singly linked list, and the next element is pointed by next + field of the previous element. |PD| must be a pointer to the + pointer to the next field of the previous element of |*PD|: if C is + the previous element of |PD|, PD = &C->next. */ +#define ngtcp2_list_insert(T, PD) \ + do { \ + (T)->next = *(PD); \ + *(PD) = (T); \ + } while (0) + +/* + * ngtcp2_arraylen returns the number of elements in array |A|. + */ +#define ngtcp2_arraylen(A) (sizeof(A) / sizeof(A[0])) + +#endif /* NGTCP2_MACRO_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c new file mode 100644 index 0000000..12bc6e8 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c @@ -0,0 +1,336 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * Copyright (c) 2012 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_map.h" + +#include <string.h> +#include <assert.h> +#include <stdio.h> + +#include "ngtcp2_conv.h" + +#define NGTCP2_INITIAL_TABLE_LENBITS 4 + +void ngtcp2_map_init(ngtcp2_map *map, const ngtcp2_mem *mem) { + map->mem = mem; + map->tablelen = 0; + map->tablelenbits = 0; + map->table = NULL; + map->size = 0; +} + +void ngtcp2_map_free(ngtcp2_map *map) { + if (!map) { + return; + } + + ngtcp2_mem_free(map->mem, map->table); +} + +void ngtcp2_map_each_free(ngtcp2_map *map, int (*func)(void *data, void *ptr), + void *ptr) { + uint32_t i; + ngtcp2_map_bucket *bkt; + + for (i = 0; i < map->tablelen; ++i) { + bkt = &map->table[i]; + + if (bkt->data == NULL) { + continue; + } + + func(bkt->data, ptr); + } +} + +int ngtcp2_map_each(ngtcp2_map *map, int (*func)(void *data, void *ptr), + void *ptr) { + int rv; + uint32_t i; + ngtcp2_map_bucket *bkt; + + if (map->size == 0) { + return 0; + } + + for (i = 0; i < map->tablelen; ++i) { + bkt = &map->table[i]; + + if (bkt->data == NULL) { + continue; + } + + rv = func(bkt->data, ptr); + if (rv != 0) { + return rv; + } + } + + return 0; +} + +static uint32_t hash(ngtcp2_map_key_type key) { + return (uint32_t)((key * 11400714819323198485llu) >> 32); +} + +static size_t h2idx(uint32_t hash, uint32_t bits) { + return hash >> (32 - bits); +} + +static size_t distance(uint32_t tablelen, uint32_t tablelenbits, + ngtcp2_map_bucket *bkt, size_t idx) { + return (idx - h2idx(bkt->hash, tablelenbits)) & (tablelen - 1); +} + +static void map_bucket_swap(ngtcp2_map_bucket *bkt, uint32_t *phash, + ngtcp2_map_key_type *pkey, void **pdata) { + uint32_t h = bkt->hash; + ngtcp2_map_key_type key = bkt->key; + void *data = bkt->data; + + bkt->hash = *phash; + bkt->key = *pkey; + bkt->data = *pdata; + + *phash = h; + *pkey = key; + *pdata = data; +} + +static void map_bucket_set_data(ngtcp2_map_bucket *bkt, uint32_t hash, + ngtcp2_map_key_type key, void *data) { + bkt->hash = hash; + bkt->key = key; + bkt->data = data; +} + +void ngtcp2_map_print_distance(ngtcp2_map *map) { + uint32_t i; + size_t idx; + ngtcp2_map_bucket *bkt; + + for (i = 0; i < map->tablelen; ++i) { + bkt = &map->table[i]; + + if (bkt->data == NULL) { + fprintf(stderr, "@%u <EMPTY>\n", i); + continue; + } + + idx = h2idx(bkt->hash, map->tablelenbits); + fprintf(stderr, "@%u hash=%08x key=%" PRIu64 " base=%zu distance=%zu\n", i, + bkt->hash, bkt->key, idx, + distance(map->tablelen, map->tablelenbits, bkt, idx)); + } +} + +static int insert(ngtcp2_map_bucket *table, uint32_t tablelen, + uint32_t tablelenbits, uint32_t hash, ngtcp2_map_key_type key, + void *data) { + size_t idx = h2idx(hash, tablelenbits); + size_t d = 0, dd; + ngtcp2_map_bucket *bkt; + + for (;;) { + bkt = &table[idx]; + + if (bkt->data == NULL) { + map_bucket_set_data(bkt, hash, key, data); + return 0; + } + + dd = distance(tablelen, tablelenbits, bkt, idx); + if (d > dd) { + map_bucket_swap(bkt, &hash, &key, &data); + d = dd; + } else if (bkt->key == key) { + /* TODO This check is just a waste after first swap or if this + function is called from map_resize. That said, there is no + difference with or without this conditional in performance + wise. */ + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + ++d; + idx = (idx + 1) & (tablelen - 1); + } +} + +/* new_tablelen must be power of 2 and new_tablelen == (1 << + new_tablelenbits) must hold. */ +static int map_resize(ngtcp2_map *map, uint32_t new_tablelen, + uint32_t new_tablelenbits) { + uint32_t i; + ngtcp2_map_bucket *new_table; + ngtcp2_map_bucket *bkt; + int rv; + (void)rv; + + new_table = + ngtcp2_mem_calloc(map->mem, new_tablelen, sizeof(ngtcp2_map_bucket)); + if (new_table == NULL) { + return NGTCP2_ERR_NOMEM; + } + + for (i = 0; i < map->tablelen; ++i) { + bkt = &map->table[i]; + if (bkt->data == NULL) { + continue; + } + rv = insert(new_table, new_tablelen, new_tablelenbits, bkt->hash, bkt->key, + bkt->data); + + assert(0 == rv); + } + + ngtcp2_mem_free(map->mem, map->table); + map->tablelen = new_tablelen; + map->tablelenbits = new_tablelenbits; + map->table = new_table; + + return 0; +} + +int ngtcp2_map_insert(ngtcp2_map *map, ngtcp2_map_key_type key, void *data) { + int rv; + + assert(data); + + /* Load factor is 0.75 */ + if ((map->size + 1) * 4 > map->tablelen * 3) { + if (map->tablelen) { + rv = map_resize(map, map->tablelen * 2, map->tablelenbits + 1); + if (rv != 0) { + return rv; + } + } else { + rv = map_resize(map, 1 << NGTCP2_INITIAL_TABLE_LENBITS, + NGTCP2_INITIAL_TABLE_LENBITS); + if (rv != 0) { + return rv; + } + } + } + + rv = insert(map->table, map->tablelen, map->tablelenbits, hash(key), key, + data); + if (rv != 0) { + return rv; + } + ++map->size; + return 0; +} + +void *ngtcp2_map_find(ngtcp2_map *map, ngtcp2_map_key_type key) { + uint32_t h; + size_t idx; + ngtcp2_map_bucket *bkt; + size_t d = 0; + + if (map->size == 0) { + return NULL; + } + + h = hash(key); + idx = h2idx(h, map->tablelenbits); + + for (;;) { + bkt = &map->table[idx]; + + if (bkt->data == NULL || + d > distance(map->tablelen, map->tablelenbits, bkt, idx)) { + return NULL; + } + + if (bkt->key == key) { + return bkt->data; + } + + ++d; + idx = (idx + 1) & (map->tablelen - 1); + } +} + +int ngtcp2_map_remove(ngtcp2_map *map, ngtcp2_map_key_type key) { + uint32_t h; + size_t idx, didx; + ngtcp2_map_bucket *bkt; + size_t d = 0; + + if (map->size == 0) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + h = hash(key); + idx = h2idx(h, map->tablelenbits); + + for (;;) { + bkt = &map->table[idx]; + + if (bkt->data == NULL || + d > distance(map->tablelen, map->tablelenbits, bkt, idx)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (bkt->key == key) { + map_bucket_set_data(bkt, 0, 0, NULL); + + didx = idx; + idx = (idx + 1) & (map->tablelen - 1); + + for (;;) { + bkt = &map->table[idx]; + if (bkt->data == NULL || + distance(map->tablelen, map->tablelenbits, bkt, idx) == 0) { + break; + } + + map->table[didx] = *bkt; + map_bucket_set_data(bkt, 0, 0, NULL); + didx = idx; + + idx = (idx + 1) & (map->tablelen - 1); + } + + --map->size; + + return 0; + } + + ++d; + idx = (idx + 1) & (map->tablelen - 1); + } +} + +void ngtcp2_map_clear(ngtcp2_map *map) { + if (map->tablelen == 0) { + return; + } + + memset(map->table, 0, sizeof(*map->table) * map->tablelen); + map->size = 0; +} + +size_t ngtcp2_map_size(ngtcp2_map *map) { return map->size; } diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h new file mode 100644 index 0000000..a64344a --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h @@ -0,0 +1,136 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * Copyright (c) 2012 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_MAP_H +#define NGTCP2_MAP_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" + +/* Implementation of unordered map */ + +typedef uint64_t ngtcp2_map_key_type; + +typedef struct ngtcp2_map_bucket { + uint32_t hash; + ngtcp2_map_key_type key; + void *data; +} ngtcp2_map_bucket; + +typedef struct ngtcp2_map { + ngtcp2_map_bucket *table; + const ngtcp2_mem *mem; + size_t size; + uint32_t tablelen; + uint32_t tablelenbits; +} ngtcp2_map; + +/* + * Initializes the map |map|. + */ +void ngtcp2_map_init(ngtcp2_map *map, const ngtcp2_mem *mem); + +/* + * Deallocates any resources allocated for |map|. The stored entries + * are not freed by this function. Use ngtcp2_map_each_free() to free + * each entries. + */ +void ngtcp2_map_free(ngtcp2_map *map); + +/* + * Deallocates each entries using |func| function and any resources + * allocated for |map|. The |func| function is responsible for freeing + * given the |data| object. The |ptr| will be passed to the |func| as + * send argument. The return value of the |func| will be ignored. + */ +void ngtcp2_map_each_free(ngtcp2_map *map, int (*func)(void *data, void *ptr), + void *ptr); + +/* + * Inserts the new |data| with the |key| to the map |map|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * The item associated by |key| already exists. + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_map_insert(ngtcp2_map *map, ngtcp2_map_key_type key, void *data); + +/* + * Returns the data associated by the key |key|. If there is no such + * data, this function returns NULL. + */ +void *ngtcp2_map_find(ngtcp2_map *map, ngtcp2_map_key_type key); + +/* + * Removes the data associated by the key |key| from the |map|. The + * removed data is not freed by this function. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * The data associated by |key| does not exist. + */ +int ngtcp2_map_remove(ngtcp2_map *map, ngtcp2_map_key_type key); + +/* + * Removes all entries from |map|. + */ +void ngtcp2_map_clear(ngtcp2_map *map); + +/* + * Returns the number of items stored in the map |map|. + */ +size_t ngtcp2_map_size(ngtcp2_map *map); + +/* + * Applies the function |func| to each data in the |map| with the + * optional user supplied pointer |ptr|. + * + * If the |func| returns 0, this function calls the |func| with the + * next data. If the |func| returns nonzero, it will not call the + * |func| for further entries and return the return value of the + * |func| immediately. Thus, this function returns 0 if all the + * invocations of the |func| return 0, or nonzero value which the last + * invocation of |func| returns. + * + * Don't use this function to free each data. Use + * ngtcp2_map_each_free() instead. + */ +int ngtcp2_map_each(ngtcp2_map *map, int (*func)(void *data, void *ptr), + void *ptr); + +void ngtcp2_map_print_distance(ngtcp2_map *map); + +#endif /* NGTCP2_MAP_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c new file mode 100644 index 0000000..bcce0b5 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c @@ -0,0 +1,113 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * Copyright (c) 2014 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_mem.h" + +#include <stdio.h> + +static void *default_malloc(size_t size, void *user_data) { + (void)user_data; + + return malloc(size); +} + +static void default_free(void *ptr, void *user_data) { + (void)user_data; + + free(ptr); +} + +static void *default_calloc(size_t nmemb, size_t size, void *user_data) { + (void)user_data; + + return calloc(nmemb, size); +} + +static void *default_realloc(void *ptr, size_t size, void *user_data) { + (void)user_data; + + return realloc(ptr, size); +} + +static const ngtcp2_mem mem_default = {NULL, default_malloc, default_free, + default_calloc, default_realloc}; + +const ngtcp2_mem *ngtcp2_mem_default(void) { return &mem_default; } + +#ifndef MEMDEBUG +void *ngtcp2_mem_malloc(const ngtcp2_mem *mem, size_t size) { + return mem->malloc(size, mem->user_data); +} + +void ngtcp2_mem_free(const ngtcp2_mem *mem, void *ptr) { + mem->free(ptr, mem->user_data); +} + +void *ngtcp2_mem_calloc(const ngtcp2_mem *mem, size_t nmemb, size_t size) { + return mem->calloc(nmemb, size, mem->user_data); +} + +void *ngtcp2_mem_realloc(const ngtcp2_mem *mem, void *ptr, size_t size) { + return mem->realloc(ptr, size, mem->user_data); +} +#else /* MEMDEBUG */ +void *ngtcp2_mem_malloc_debug(const ngtcp2_mem *mem, size_t size, + const char *func, const char *file, size_t line) { + void *nptr = mem->malloc(size, mem->user_data); + + fprintf(stderr, "malloc %p size=%zu in %s at %s:%zu\n", nptr, size, func, + file, line); + + return nptr; +} + +void ngtcp2_mem_free_debug(const ngtcp2_mem *mem, void *ptr, const char *func, + const char *file, size_t line) { + fprintf(stderr, "free ptr=%p in %s at %s:%zu\n", ptr, func, file, line); + + mem->free(ptr, mem->user_data); +} + +void *ngtcp2_mem_calloc_debug(const ngtcp2_mem *mem, size_t nmemb, size_t size, + const char *func, const char *file, size_t line) { + void *nptr = mem->calloc(nmemb, size, mem->user_data); + + fprintf(stderr, "calloc %p nmemb=%zu size=%zu in %s at %s:%zu\n", nptr, nmemb, + size, func, file, line); + + return nptr; +} + +void *ngtcp2_mem_realloc_debug(const ngtcp2_mem *mem, void *ptr, size_t size, + const char *func, const char *file, + size_t line) { + void *nptr = mem->realloc(ptr, size, mem->user_data); + + fprintf(stderr, "realloc %p ptr=%p size=%zu in %s at %s:%zu\n", nptr, ptr, + size, func, file, line); + + return nptr; +} +#endif /* MEMDEBUG */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h new file mode 100644 index 0000000..c99b6c5 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h @@ -0,0 +1,72 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * Copyright (c) 2014 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_MEM_H +#define NGTCP2_MEM_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +/* Convenient wrapper functions to call allocator function in + |mem|. */ +#ifndef MEMDEBUG +void *ngtcp2_mem_malloc(const ngtcp2_mem *mem, size_t size); + +void ngtcp2_mem_free(const ngtcp2_mem *mem, void *ptr); + +void *ngtcp2_mem_calloc(const ngtcp2_mem *mem, size_t nmemb, size_t size); + +void *ngtcp2_mem_realloc(const ngtcp2_mem *mem, void *ptr, size_t size); +#else /* MEMDEBUG */ +void *ngtcp2_mem_malloc_debug(const ngtcp2_mem *mem, size_t size, + const char *func, const char *file, size_t line); + +# define ngtcp2_mem_malloc(MEM, SIZE) \ + ngtcp2_mem_malloc_debug((MEM), (SIZE), __func__, __FILE__, __LINE__) + +void ngtcp2_mem_free_debug(const ngtcp2_mem *mem, void *ptr, const char *func, + const char *file, size_t line); + +# define ngtcp2_mem_free(MEM, PTR) \ + ngtcp2_mem_free_debug((MEM), (PTR), __func__, __FILE__, __LINE__) + +void *ngtcp2_mem_calloc_debug(const ngtcp2_mem *mem, size_t nmemb, size_t size, + const char *func, const char *file, size_t line); + +# define ngtcp2_mem_calloc(MEM, NMEMB, SIZE) \ + ngtcp2_mem_calloc_debug((MEM), (NMEMB), (SIZE), __func__, __FILE__, \ + __LINE__) + +void *ngtcp2_mem_realloc_debug(const ngtcp2_mem *mem, void *ptr, size_t size, + const char *func, const char *file, size_t line); + +# define ngtcp2_mem_realloc(MEM, PTR, SIZE) \ + ngtcp2_mem_realloc_debug((MEM), (PTR), (SIZE), __func__, __FILE__, __LINE__) +#endif /* MEMDEBUG */ + +#endif /* NGTCP2_MEM_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h new file mode 100644 index 0000000..cd73d2b --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h @@ -0,0 +1,136 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_NET_H +#define NGTCP2_NET_H + +/* This header file is explicitly allowed to be shared with + ngtcp2_crypto library. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_ARPA_INET_H +# include <arpa/inet.h> +#endif /* HAVE_ARPA_INET_H */ + +#ifdef HAVE_NETINET_IN_H +# include <netinet/in.h> +#endif /* HAVE_NETINET_IN_H */ + +#ifdef HAVE_BYTESWAP_H +# include <byteswap.h> +#endif /* HAVE_BYTESWAP_H */ + +#ifdef HAVE_ENDIAN_H +# include <endian.h> +#endif /* HAVE_ENDIAN_H */ + +#ifdef HAVE_SYS_ENDIAN_H +# include <sys/endian.h> +#endif /* HAVE_SYS_ENDIAN_H */ + +#include <ngtcp2/ngtcp2.h> + +#if defined(HAVE_BSWAP_64) || \ + (defined(HAVE_DECL_BSWAP_64) && HAVE_DECL_BSWAP_64 > 0) +# define ngtcp2_bswap64 bswap_64 +#else /* !HAVE_BSWAP_64 */ +# define ngtcp2_bswap64(N) \ + ((uint64_t)(ngtcp2_ntohl((uint32_t)(N))) << 32 | \ + ngtcp2_ntohl((uint32_t)((N) >> 32))) +#endif /* !HAVE_BSWAP_64 */ + +#if defined(HAVE_BE64TOH) || \ + (defined(HAVE_DECL_BE64TOH) && HAVE_DECL_BE64TOH > 0) +# define ngtcp2_ntohl64(N) be64toh(N) +# define ngtcp2_htonl64(N) htobe64(N) +#else /* !HAVE_BE64TOH */ +# if defined(WORDS_BIGENDIAN) +# define ngtcp2_ntohl64(N) (N) +# define ngtcp2_htonl64(N) (N) +# else /* !WORDS_BIGENDIAN */ +# define ngtcp2_ntohl64(N) ngtcp2_bswap64(N) +# define ngtcp2_htonl64(N) ngtcp2_bswap64(N) +# endif /* !WORDS_BIGENDIAN */ +#endif /* !HAVE_BE64TOH */ + +#if defined(WIN32) +/* Windows requires ws2_32 library for ntonl family functions. We + define inline functions for those function so that we don't have + dependeny on that lib. */ + +# ifdef _MSC_VER +# define STIN static __inline +# else +# define STIN static inline +# endif + +STIN uint32_t ngtcp2_htonl(uint32_t hostlong) { + uint32_t res; + unsigned char *p = (unsigned char *)&res; + *p++ = (unsigned char)(hostlong >> 24); + *p++ = (hostlong >> 16) & 0xffu; + *p++ = (hostlong >> 8) & 0xffu; + *p = hostlong & 0xffu; + return res; +} + +STIN uint16_t ngtcp2_htons(uint16_t hostshort) { + uint16_t res; + unsigned char *p = (unsigned char *)&res; + *p++ = (unsigned char)(hostshort >> 8); + *p = hostshort & 0xffu; + return res; +} + +STIN uint32_t ngtcp2_ntohl(uint32_t netlong) { + uint32_t res; + unsigned char *p = (unsigned char *)&netlong; + res = (uint32_t)(*p++ << 24); + res += (uint32_t)(*p++ << 16); + res += (uint32_t)(*p++ << 8); + res += *p; + return res; +} + +STIN uint16_t ngtcp2_ntohs(uint16_t netshort) { + uint16_t res; + unsigned char *p = (unsigned char *)&netshort; + res = (uint16_t)(*p++ << 8); + res += *p; + return res; +} + +#else /* !WIN32 */ + +# define ngtcp2_htonl htonl +# define ngtcp2_htons htons +# define ngtcp2_ntohl ntohl +# define ngtcp2_ntohs ntohs + +#endif /* !WIN32 */ + +#endif /* NGTCP2_NET_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c new file mode 100644 index 0000000..8b06cdd --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c @@ -0,0 +1,40 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_objalloc.h" + +void ngtcp2_objalloc_init(ngtcp2_objalloc *objalloc, size_t blklen, + const ngtcp2_mem *mem) { + ngtcp2_balloc_init(&objalloc->balloc, blklen, mem); + ngtcp2_opl_init(&objalloc->opl); +} + +void ngtcp2_objalloc_free(ngtcp2_objalloc *objalloc) { + ngtcp2_balloc_free(&objalloc->balloc); +} + +void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc) { + ngtcp2_opl_clear(&objalloc->opl); + ngtcp2_balloc_clear(&objalloc->balloc); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h new file mode 100644 index 0000000..f1bbd3a --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h @@ -0,0 +1,140 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_OBJALLOC_H +#define NGTCP2_OBJALLOC_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_balloc.h" +#include "ngtcp2_opl.h" +#include "ngtcp2_macro.h" +#include "ngtcp2_mem.h" + +/* + * ngtcp2_objalloc combines ngtcp2_balloc and ngtcp2_opl, and provides + * an object pool with the custom allocator to reduce the allocation + * and deallocation overheads for small objects. + */ +typedef struct ngtcp2_objalloc { + ngtcp2_balloc balloc; + ngtcp2_opl opl; +} ngtcp2_objalloc; + +/* + * ngtcp2_objalloc_init initializes |objalloc|. |blklen| is directly + * passed to ngtcp2_balloc_init. + */ +void ngtcp2_objalloc_init(ngtcp2_objalloc *objalloc, size_t blklen, + const ngtcp2_mem *mem); + +/* + * ngtcp2_objalloc_free releases all allocated resources. + */ +void ngtcp2_objalloc_free(ngtcp2_objalloc *objalloc); + +/* + * ngtcp2_objalloc_clear releases all allocated resources and + * initializes its state. + */ +void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc); + +#ifndef NOMEMPOOL +# define ngtcp2_objalloc_def(NAME, TYPE, OPLENTFIELD) \ + inline static void ngtcp2_objalloc_##NAME##_init( \ + ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \ + ngtcp2_objalloc_init( \ + objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ + } \ + \ + inline static TYPE *ngtcp2_objalloc_##NAME##_get( \ + ngtcp2_objalloc *objalloc) { \ + ngtcp2_opl_entry *oplent = ngtcp2_opl_pop(&objalloc->opl); \ + TYPE *obj; \ + int rv; \ + \ + if (!oplent) { \ + rv = \ + ngtcp2_balloc_get(&objalloc->balloc, (void **)&obj, sizeof(TYPE)); \ + if (rv != 0) { \ + return NULL; \ + } \ + \ + return obj; \ + } \ + \ + return ngtcp2_struct_of(oplent, TYPE, OPLENTFIELD); \ + } \ + \ + inline static TYPE *ngtcp2_objalloc_##NAME##_len_get( \ + ngtcp2_objalloc *objalloc, size_t len) { \ + ngtcp2_opl_entry *oplent = ngtcp2_opl_pop(&objalloc->opl); \ + TYPE *obj; \ + int rv; \ + \ + if (!oplent) { \ + rv = ngtcp2_balloc_get(&objalloc->balloc, (void **)&obj, len); \ + if (rv != 0) { \ + return NULL; \ + } \ + \ + return obj; \ + } \ + \ + return ngtcp2_struct_of(oplent, TYPE, OPLENTFIELD); \ + } \ + \ + inline static void ngtcp2_objalloc_##NAME##_release( \ + ngtcp2_objalloc *objalloc, TYPE *obj) { \ + ngtcp2_opl_push(&objalloc->opl, &obj->OPLENTFIELD); \ + } +#else /* NOMEMPOOL */ +# define ngtcp2_objalloc_def(NAME, TYPE, OPLENTFIELD) \ + inline static void ngtcp2_objalloc_##NAME##_init( \ + ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \ + ngtcp2_objalloc_init( \ + objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \ + } \ + \ + inline static TYPE *ngtcp2_objalloc_##NAME##_get( \ + ngtcp2_objalloc *objalloc) { \ + return ngtcp2_mem_malloc(objalloc->balloc.mem, sizeof(TYPE)); \ + } \ + \ + inline static TYPE *ngtcp2_objalloc_##NAME##_len_get( \ + ngtcp2_objalloc *objalloc, size_t len) { \ + return ngtcp2_mem_malloc(objalloc->balloc.mem, len); \ + } \ + \ + inline static void ngtcp2_objalloc_##NAME##_release( \ + ngtcp2_objalloc *objalloc, TYPE *obj) { \ + ngtcp2_mem_free(objalloc->balloc.mem, obj); \ + } +#endif /* NOMEMPOOL */ + +#endif /* NGTCP2_OBJALLOC_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c new file mode 100644 index 0000000..a29361c --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c @@ -0,0 +1,46 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_opl.h" + +void ngtcp2_opl_init(ngtcp2_opl *opl) { opl->head = NULL; } + +void ngtcp2_opl_push(ngtcp2_opl *opl, ngtcp2_opl_entry *ent) { + ent->next = opl->head; + opl->head = ent; +} + +ngtcp2_opl_entry *ngtcp2_opl_pop(ngtcp2_opl *opl) { + ngtcp2_opl_entry *ent = opl->head; + + if (!ent) { + return NULL; + } + + opl->head = ent->next; + + return ent; +} + +void ngtcp2_opl_clear(ngtcp2_opl *opl) { opl->head = NULL; } diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h new file mode 100644 index 0000000..714aa36 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h @@ -0,0 +1,65 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_OPL_H +#define NGTCP2_OPL_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +typedef struct ngtcp2_opl_entry ngtcp2_opl_entry; + +struct ngtcp2_opl_entry { + ngtcp2_opl_entry *next; +}; + +/* + * ngtcp2_opl is an object memory pool. + */ +typedef struct ngtcp2_opl { + ngtcp2_opl_entry *head; +} ngtcp2_opl; + +/* + * ngtcp2_opl_init initializes |opl|. + */ +void ngtcp2_opl_init(ngtcp2_opl *opl); + +/* + * ngtcp2_opl_push inserts |ent| to |opl| head. + */ +void ngtcp2_opl_push(ngtcp2_opl *opl, ngtcp2_opl_entry *ent); + +/* + * ngtcp2_opl_pop removes the first ngtcp2_opl_entry from |opl| and + * returns it. If |opl| does not have any entry, it returns NULL. + */ +ngtcp2_opl_entry *ngtcp2_opl_pop(ngtcp2_opl *opl); + +void ngtcp2_opl_clear(ngtcp2_opl *opl); + +#endif /* NGTCP2_OPL_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c new file mode 100644 index 0000000..8323873 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c @@ -0,0 +1,77 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_path.h" + +#include <string.h> + +#include "ngtcp2_addr.h" + +void ngtcp2_path_init(ngtcp2_path *path, const ngtcp2_addr *local, + const ngtcp2_addr *remote) { + path->local = *local; + path->remote = *remote; +} + +void ngtcp2_path_copy(ngtcp2_path *dest, const ngtcp2_path *src) { + ngtcp2_addr_copy(&dest->local, &src->local); + ngtcp2_addr_copy(&dest->remote, &src->remote); + dest->user_data = src->user_data; +} + +int ngtcp2_path_eq(const ngtcp2_path *a, const ngtcp2_path *b) { + return ngtcp2_addr_eq(&a->local, &b->local) && + ngtcp2_addr_eq(&a->remote, &b->remote); +} + +void ngtcp2_path_storage_init(ngtcp2_path_storage *ps, + const ngtcp2_sockaddr *local_addr, + ngtcp2_socklen local_addrlen, + const ngtcp2_sockaddr *remote_addr, + ngtcp2_socklen remote_addrlen, void *user_data) { + ngtcp2_addr_init(&ps->path.local, (const ngtcp2_sockaddr *)&ps->local_addrbuf, + 0); + ngtcp2_addr_init(&ps->path.remote, + (const ngtcp2_sockaddr *)&ps->remote_addrbuf, 0); + + ngtcp2_addr_copy_byte(&ps->path.local, local_addr, local_addrlen); + ngtcp2_addr_copy_byte(&ps->path.remote, remote_addr, remote_addrlen); + + ps->path.user_data = user_data; +} + +void ngtcp2_path_storage_init2(ngtcp2_path_storage *ps, + const ngtcp2_path *path) { + ngtcp2_path_storage_init(ps, path->local.addr, path->local.addrlen, + path->remote.addr, path->remote.addrlen, + path->user_data); +} + +void ngtcp2_path_storage_zero(ngtcp2_path_storage *ps) { + ngtcp2_addr_init(&ps->path.local, (const ngtcp2_sockaddr *)&ps->local_addrbuf, + 0); + ngtcp2_addr_init(&ps->path.remote, + (const ngtcp2_sockaddr *)&ps->remote_addrbuf, 0); + ps->path.user_data = NULL; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h new file mode 100644 index 0000000..0c360e9 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h @@ -0,0 +1,49 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_PATH_H +#define NGTCP2_PATH_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +/* + * ngtcp2_path_init initializes |path| with the given addresses. Note + * that the buffer pointed by local->addr and remote->addr are not + * copied. Their pointer values are assigned instead. + */ +void ngtcp2_path_init(ngtcp2_path *path, const ngtcp2_addr *local, + const ngtcp2_addr *remote); + +/* + * ngtcp2_path_storage_init2 initializes |ps| using |path| as initial + * data. + */ +void ngtcp2_path_storage_init2(ngtcp2_path_storage *ps, + const ngtcp2_path *path); + +#endif /* NGTCP2_PATH_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c new file mode 100644 index 0000000..7ca63b3 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c @@ -0,0 +1,2527 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_pkt.h" + +#include <assert.h> +#include <string.h> + +#include "ngtcp2_conv.h" +#include "ngtcp2_str.h" +#include "ngtcp2_macro.h" +#include "ngtcp2_cid.h" +#include "ngtcp2_mem.h" +#include "ngtcp2_vec.h" +#include "ngtcp2_unreachable.h" +#include "ngtcp2_str.h" + +int ngtcp2_pkt_chain_new(ngtcp2_pkt_chain **ppc, const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, const uint8_t *pkt, + size_t pktlen, size_t dgramlen, ngtcp2_tstamp ts, + const ngtcp2_mem *mem) { + *ppc = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pkt_chain) + pktlen); + if (*ppc == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_path_storage_init2(&(*ppc)->path, path); + (*ppc)->pi = *pi; + (*ppc)->next = NULL; + (*ppc)->pkt = (uint8_t *)(*ppc) + sizeof(ngtcp2_pkt_chain); + (*ppc)->pktlen = pktlen; + (*ppc)->dgramlen = dgramlen; + (*ppc)->ts = ts; + + memcpy((*ppc)->pkt, pkt, pktlen); + + return 0; +} + +void ngtcp2_pkt_chain_del(ngtcp2_pkt_chain *pc, const ngtcp2_mem *mem) { + ngtcp2_mem_free(mem, pc); +} + +int ngtcp2_pkt_decode_version_cid(ngtcp2_version_cid *dest, const uint8_t *data, + size_t datalen, size_t short_dcidlen) { + size_t len; + uint32_t version; + size_t dcidlen, scidlen; + int supported_version; + + assert(datalen); + + if (data[0] & NGTCP2_HEADER_FORM_BIT) { + /* 1 byte (Header Form, Fixed Bit, Long Packet Type, Type-Specific bits) + * 4 bytes Version + * 1 byte DCID Length + * 1 byte SCID Length + */ + len = 1 + 4 + 1 + 1; + if (datalen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + dcidlen = data[5]; + len += dcidlen; + if (datalen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + scidlen = data[5 + 1 + dcidlen]; + len += scidlen; + if (datalen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + ngtcp2_get_uint32(&version, &data[1]); + + supported_version = ngtcp2_is_supported_version(version); + + if (supported_version && + (dcidlen > NGTCP2_MAX_CIDLEN || scidlen > NGTCP2_MAX_CIDLEN)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (version && !supported_version && + datalen < NGTCP2_MAX_UDP_PAYLOAD_SIZE) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + dest->version = version; + dest->dcid = &data[6]; + dest->dcidlen = dcidlen; + dest->scid = &data[6 + dcidlen + 1]; + dest->scidlen = scidlen; + + if (!version) { + /* VN */ + return 0; + } + + if (!supported_version) { + return NGTCP2_ERR_VERSION_NEGOTIATION; + } + return 0; + } + + assert(short_dcidlen <= NGTCP2_MAX_CIDLEN); + + len = 1 + short_dcidlen; + if (datalen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + dest->version = 0; + dest->dcid = &data[1]; + dest->dcidlen = short_dcidlen; + dest->scid = NULL; + dest->scidlen = 0; + + return 0; +} + +void ngtcp2_pkt_hd_init(ngtcp2_pkt_hd *hd, uint8_t flags, uint8_t type, + const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + int64_t pkt_num, size_t pkt_numlen, uint32_t version, + size_t len) { + hd->flags = flags; + hd->type = type; + if (dcid) { + hd->dcid = *dcid; + } else { + ngtcp2_cid_zero(&hd->dcid); + } + if (scid) { + hd->scid = *scid; + } else { + ngtcp2_cid_zero(&hd->scid); + } + hd->pkt_num = pkt_num; + hd->token = NULL; + hd->tokenlen = 0; + hd->pkt_numlen = pkt_numlen; + hd->version = version; + hd->len = len; +} + +static int has_mask(uint8_t b, uint8_t mask) { return (b & mask) == mask; } + +ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt, + size_t pktlen) { + uint8_t type; + uint32_t version; + size_t dcil, scil; + const uint8_t *p; + size_t len = 0; + size_t n; + size_t ntokenlen = 0; + const uint8_t *token = NULL; + size_t tokenlen = 0; + uint64_t vi; + uint8_t flags = NGTCP2_PKT_FLAG_LONG_FORM; + + if (pktlen < 5) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (!(pkt[0] & NGTCP2_HEADER_FORM_BIT)) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + ngtcp2_get_uint32(&version, &pkt[1]); + + if (version == 0) { + type = NGTCP2_PKT_VERSION_NEGOTIATION; + /* Version Negotiation is not a long header packet. */ + flags = NGTCP2_PKT_FLAG_NONE; + /* This must be Version Negotiation packet which lacks packet + number and payload length fields. */ + len = 5 + 2; + } else { + if (!(pkt[0] & NGTCP2_FIXED_BIT_MASK)) { + flags |= NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR; + } + + type = ngtcp2_pkt_get_type_long(version, pkt[0]); + switch (type) { + case 0: + return NGTCP2_ERR_INVALID_ARGUMENT; + case NGTCP2_PKT_INITIAL: + len = 1 /* Token Length */ + NGTCP2_MIN_LONG_HEADERLEN - + 1; /* Cut packet number field */ + break; + case NGTCP2_PKT_RETRY: + /* Retry packet does not have packet number and length fields */ + len = 5 + 2; + break; + case NGTCP2_PKT_HANDSHAKE: + case NGTCP2_PKT_0RTT: + len = NGTCP2_MIN_LONG_HEADERLEN - 1; /* Cut packet number field */ + break; + default: + /* Unreachable */ + ngtcp2_unreachable(); + } + } + + if (pktlen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + p = &pkt[5]; + dcil = *p; + if (dcil > NGTCP2_MAX_CIDLEN) { + /* QUIC v1 implementation never expect to receive CID length more + than NGTCP2_MAX_CIDLEN. */ + return NGTCP2_ERR_INVALID_ARGUMENT; + } + len += dcil; + + if (pktlen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + p += 1 + dcil; + scil = *p; + if (scil > NGTCP2_MAX_CIDLEN) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + len += scil; + + if (pktlen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + p += 1 + scil; + + if (type == NGTCP2_PKT_INITIAL) { + /* Token Length */ + ntokenlen = ngtcp2_get_uvarintlen(p); + len += ntokenlen - 1; + + if (pktlen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + p = ngtcp2_get_uvarint(&vi, p); + if (pktlen - len < vi) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + tokenlen = (size_t)vi; + len += tokenlen; + + if (tokenlen) { + token = p; + } + + p += tokenlen; + } + + switch (type) { + case NGTCP2_PKT_RETRY: + break; + default: + if (!(flags & NGTCP2_PKT_FLAG_LONG_FORM)) { + assert(type == NGTCP2_PKT_VERSION_NEGOTIATION); + /* Version Negotiation is not a long header packet. */ + break; + } + + /* Length */ + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (pktlen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + } + + dest->flags = flags; + dest->type = type; + dest->version = version; + dest->pkt_num = 0; + dest->pkt_numlen = 0; + + p = &pkt[6]; + ngtcp2_cid_init(&dest->dcid, p, dcil); + p += dcil + 1; + ngtcp2_cid_init(&dest->scid, p, scil); + p += scil; + + dest->token = token; + dest->tokenlen = tokenlen; + p += ntokenlen + tokenlen; + + switch (type) { + case NGTCP2_PKT_RETRY: + dest->len = 0; + break; + default: + if (!(flags & NGTCP2_PKT_FLAG_LONG_FORM)) { + assert(type == NGTCP2_PKT_VERSION_NEGOTIATION); + /* Version Negotiation is not a long header packet. */ + dest->len = 0; + break; + } + + p = ngtcp2_get_uvarint(&vi, p); + if (vi > SIZE_MAX) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + dest->len = (size_t)vi; + } + + assert((size_t)(p - pkt) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_hd_short(ngtcp2_pkt_hd *dest, const uint8_t *pkt, + size_t pktlen, size_t dcidlen) { + size_t len = 1 + dcidlen; + const uint8_t *p = pkt; + uint8_t flags = NGTCP2_PKT_FLAG_NONE; + + assert(dcidlen <= NGTCP2_MAX_CIDLEN); + + if (pktlen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (pkt[0] & NGTCP2_HEADER_FORM_BIT) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + if (!(pkt[0] & NGTCP2_FIXED_BIT_MASK)) { + flags |= NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR; + } + + p = &pkt[1]; + + dest->type = NGTCP2_PKT_1RTT; + + ngtcp2_cid_init(&dest->dcid, p, dcidlen); + p += dcidlen; + + /* Set 0 to SCID so that we don't accidentally reference it and gets + garbage. */ + ngtcp2_cid_zero(&dest->scid); + + dest->flags = flags; + dest->version = 0; + dest->len = 0; + dest->pkt_num = 0; + dest->pkt_numlen = 0; + dest->token = NULL; + dest->tokenlen = 0; + + assert((size_t)(p - pkt) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_hd_long(uint8_t *out, size_t outlen, + const ngtcp2_pkt_hd *hd) { + uint8_t *p; + size_t len = NGTCP2_MIN_LONG_HEADERLEN + hd->dcid.datalen + hd->scid.datalen - + 2; /* NGTCP2_MIN_LONG_HEADERLEN includes 1 byte for + len and 1 byte for packet number. */ + + if (hd->type != NGTCP2_PKT_RETRY) { + len += NGTCP2_PKT_LENGTHLEN /* Length */ + hd->pkt_numlen; + } + + if (hd->type == NGTCP2_PKT_INITIAL) { + len += ngtcp2_put_uvarintlen(hd->tokenlen) + hd->tokenlen; + } + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p = (uint8_t)(NGTCP2_HEADER_FORM_BIT | + (ngtcp2_pkt_versioned_type(hd->version, hd->type) << 4) | + (uint8_t)(hd->pkt_numlen - 1)); + if (!(hd->flags & NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR)) { + *p |= NGTCP2_FIXED_BIT_MASK; + } + + ++p; + + p = ngtcp2_put_uint32be(p, hd->version); + *p++ = (uint8_t)hd->dcid.datalen; + if (hd->dcid.datalen) { + p = ngtcp2_cpymem(p, hd->dcid.data, hd->dcid.datalen); + } + *p++ = (uint8_t)hd->scid.datalen; + if (hd->scid.datalen) { + p = ngtcp2_cpymem(p, hd->scid.data, hd->scid.datalen); + } + + if (hd->type == NGTCP2_PKT_INITIAL) { + p = ngtcp2_put_uvarint(p, hd->tokenlen); + if (hd->tokenlen) { + p = ngtcp2_cpymem(p, hd->token, hd->tokenlen); + } + } + + if (hd->type != NGTCP2_PKT_RETRY) { + p = ngtcp2_put_uvarint30(p, (uint32_t)hd->len); + p = ngtcp2_put_pkt_num(p, hd->pkt_num, hd->pkt_numlen); + } + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_hd_short(uint8_t *out, size_t outlen, + const ngtcp2_pkt_hd *hd) { + uint8_t *p; + size_t len = 1 + hd->dcid.datalen + hd->pkt_numlen; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p = (uint8_t)(hd->pkt_numlen - 1); + if (!(hd->flags & NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR)) { + *p |= NGTCP2_FIXED_BIT_MASK; + } + if (hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE) { + *p |= NGTCP2_SHORT_KEY_PHASE_BIT; + } + + ++p; + + if (hd->dcid.datalen) { + p = ngtcp2_cpymem(p, hd->dcid.data, hd->dcid.datalen); + } + + p = ngtcp2_put_pkt_num(p, hd->pkt_num, hd->pkt_numlen); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_frame(ngtcp2_frame *dest, const uint8_t *payload, + size_t payloadlen) { + uint8_t type; + + if (payloadlen == 0) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + type = payload[0]; + + switch (type) { + case NGTCP2_FRAME_PADDING: + return ngtcp2_pkt_decode_padding_frame(&dest->padding, payload, payloadlen); + case NGTCP2_FRAME_RESET_STREAM: + return ngtcp2_pkt_decode_reset_stream_frame(&dest->reset_stream, payload, + payloadlen); + case NGTCP2_FRAME_CONNECTION_CLOSE: + case NGTCP2_FRAME_CONNECTION_CLOSE_APP: + return ngtcp2_pkt_decode_connection_close_frame(&dest->connection_close, + payload, payloadlen); + case NGTCP2_FRAME_MAX_DATA: + return ngtcp2_pkt_decode_max_data_frame(&dest->max_data, payload, + payloadlen); + case NGTCP2_FRAME_MAX_STREAM_DATA: + return ngtcp2_pkt_decode_max_stream_data_frame(&dest->max_stream_data, + payload, payloadlen); + case NGTCP2_FRAME_MAX_STREAMS_BIDI: + case NGTCP2_FRAME_MAX_STREAMS_UNI: + return ngtcp2_pkt_decode_max_streams_frame(&dest->max_streams, payload, + payloadlen); + case NGTCP2_FRAME_PING: + return ngtcp2_pkt_decode_ping_frame(&dest->ping, payload, payloadlen); + case NGTCP2_FRAME_DATA_BLOCKED: + return ngtcp2_pkt_decode_data_blocked_frame(&dest->data_blocked, payload, + payloadlen); + case NGTCP2_FRAME_STREAM_DATA_BLOCKED: + return ngtcp2_pkt_decode_stream_data_blocked_frame( + &dest->stream_data_blocked, payload, payloadlen); + case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: + case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: + return ngtcp2_pkt_decode_streams_blocked_frame(&dest->streams_blocked, + payload, payloadlen); + case NGTCP2_FRAME_NEW_CONNECTION_ID: + return ngtcp2_pkt_decode_new_connection_id_frame(&dest->new_connection_id, + payload, payloadlen); + case NGTCP2_FRAME_STOP_SENDING: + return ngtcp2_pkt_decode_stop_sending_frame(&dest->stop_sending, payload, + payloadlen); + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + return ngtcp2_pkt_decode_ack_frame(&dest->ack, payload, payloadlen); + case NGTCP2_FRAME_PATH_CHALLENGE: + return ngtcp2_pkt_decode_path_challenge_frame(&dest->path_challenge, + payload, payloadlen); + case NGTCP2_FRAME_PATH_RESPONSE: + return ngtcp2_pkt_decode_path_response_frame(&dest->path_response, payload, + payloadlen); + case NGTCP2_FRAME_CRYPTO: + return ngtcp2_pkt_decode_crypto_frame(&dest->crypto, payload, payloadlen); + case NGTCP2_FRAME_NEW_TOKEN: + return ngtcp2_pkt_decode_new_token_frame(&dest->new_token, payload, + payloadlen); + case NGTCP2_FRAME_RETIRE_CONNECTION_ID: + return ngtcp2_pkt_decode_retire_connection_id_frame( + &dest->retire_connection_id, payload, payloadlen); + case NGTCP2_FRAME_HANDSHAKE_DONE: + return ngtcp2_pkt_decode_handshake_done_frame(&dest->handshake_done, + payload, payloadlen); + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + return ngtcp2_pkt_decode_datagram_frame(&dest->datagram, payload, + payloadlen); + default: + if (has_mask(type, NGTCP2_FRAME_STREAM)) { + return ngtcp2_pkt_decode_stream_frame(&dest->stream, payload, payloadlen); + } + return NGTCP2_ERR_FRAME_ENCODING; + } +} + +ngtcp2_ssize ngtcp2_pkt_decode_stream_frame(ngtcp2_stream *dest, + const uint8_t *payload, + size_t payloadlen) { + uint8_t type; + size_t len = 1 + 1; + const uint8_t *p; + size_t datalen; + size_t ndatalen = 0; + size_t n; + uint64_t vi; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + type = payload[0]; + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + if (type & NGTCP2_STREAM_OFF_BIT) { + ++len; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + } + + if (type & NGTCP2_STREAM_LEN_BIT) { + ++len; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + ndatalen = ngtcp2_get_uvarintlen(p); + len += ndatalen - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + /* p = */ ngtcp2_get_uvarint(&vi, p); + if (payloadlen - len < vi) { + return NGTCP2_ERR_FRAME_ENCODING; + } + datalen = (size_t)vi; + len += datalen; + } else { + len = payloadlen; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_STREAM; + dest->flags = (uint8_t)(type & ~NGTCP2_FRAME_STREAM); + dest->fin = (type & NGTCP2_STREAM_FIN_BIT) != 0; + p = ngtcp2_get_varint(&dest->stream_id, p); + + if (type & NGTCP2_STREAM_OFF_BIT) { + p = ngtcp2_get_uvarint(&dest->offset, p); + } else { + dest->offset = 0; + } + + if (type & NGTCP2_STREAM_LEN_BIT) { + p += ndatalen; + } else { + datalen = payloadlen - (size_t)(p - payload); + } + + if (datalen) { + dest->data[0].len = datalen; + dest->data[0].base = (uint8_t *)p; + dest->datacnt = 1; + p += datalen; + } else { + dest->datacnt = 0; + } + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_ack_frame(ngtcp2_ack *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t rangecnt, max_rangecnt; + size_t nrangecnt; + size_t len = 1 + 1 + 1 + 1 + 1; + const uint8_t *p; + size_t i, j; + ngtcp2_ack_range *range; + size_t n; + uint8_t type; + uint64_t vi; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + type = payload[0]; + + p = payload + 1; + + /* Largest Acknowledged */ + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + /* ACK Delay */ + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + /* ACK Range Count */ + nrangecnt = ngtcp2_get_uvarintlen(p); + len += nrangecnt - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = ngtcp2_get_uvarint(&vi, p); + if (vi > SIZE_MAX / (1 + 1) || payloadlen - len < vi * (1 + 1)) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + rangecnt = (size_t)vi; + len += rangecnt * (1 + 1); + + /* First ACK Range */ + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + for (i = 0; i < rangecnt; ++i) { + /* Gap, and Additional ACK Range */ + for (j = 0; j < 2; ++j) { + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + } + } + + if (type == NGTCP2_FRAME_ACK_ECN) { + len += 3; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + for (i = 0; i < 3; ++i) { + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + } + } + + /* TODO We might not decode all ranges. It could be very large. */ + max_rangecnt = ngtcp2_min(NGTCP2_MAX_ACK_RANGES, rangecnt); + + p = payload + 1; + + dest->type = type; + p = ngtcp2_get_varint(&dest->largest_ack, p); + p = ngtcp2_get_uvarint(&dest->ack_delay, p); + /* This value will be assigned in the upper layer. */ + dest->ack_delay_unscaled = 0; + dest->rangecnt = max_rangecnt; + p += nrangecnt; + p = ngtcp2_get_uvarint(&dest->first_ack_range, p); + + for (i = 0; i < max_rangecnt; ++i) { + range = &dest->ranges[i]; + p = ngtcp2_get_uvarint(&range->gap, p); + p = ngtcp2_get_uvarint(&range->len, p); + } + for (i = max_rangecnt; i < rangecnt; ++i) { + p += ngtcp2_get_uvarintlen(p); + p += ngtcp2_get_uvarintlen(p); + } + + if (type == NGTCP2_FRAME_ACK_ECN) { + p = ngtcp2_get_uvarint(&dest->ecn.ect0, p); + p = ngtcp2_get_uvarint(&dest->ecn.ect1, p); + p = ngtcp2_get_uvarint(&dest->ecn.ce, p); + } + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_padding_frame(ngtcp2_padding *dest, + const uint8_t *payload, + size_t payloadlen) { + const uint8_t *p, *ep; + + assert(payloadlen > 0); + + p = payload + 1; + ep = payload + payloadlen; + + for (; p != ep && *p == NGTCP2_FRAME_PADDING; ++p) + ; + + dest->type = NGTCP2_FRAME_PADDING; + dest->len = (size_t)(p - payload); + + return (ngtcp2_ssize)dest->len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1 + 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + p += n; + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + p += n; + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_RESET_STREAM; + p = ngtcp2_get_varint(&dest->stream_id, p); + p = ngtcp2_get_uvarint(&dest->app_error_code, p); + p = ngtcp2_get_uvarint(&dest->final_size, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( + ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen) { + size_t len = 1 + 1 + 1; + const uint8_t *p; + size_t reasonlen; + size_t nreasonlen; + size_t n; + uint8_t type; + uint64_t vi; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + type = payload[0]; + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + if (type == NGTCP2_FRAME_CONNECTION_CLOSE) { + ++len; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + } + + nreasonlen = ngtcp2_get_uvarintlen(p); + len += nreasonlen - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = ngtcp2_get_uvarint(&vi, p); + if (payloadlen - len < vi) { + return NGTCP2_ERR_FRAME_ENCODING; + } + reasonlen = (size_t)vi; + len += reasonlen; + + p = payload + 1; + + dest->type = type; + p = ngtcp2_get_uvarint(&dest->error_code, p); + if (type == NGTCP2_FRAME_CONNECTION_CLOSE) { + p = ngtcp2_get_uvarint(&dest->frame_type, p); + } else { + dest->frame_type = 0; + } + dest->reasonlen = reasonlen; + p += nreasonlen; + if (reasonlen == 0) { + dest->reason = NULL; + } else { + dest->reason = (uint8_t *)p; + p += reasonlen; + } + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_max_data_frame(ngtcp2_max_data *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + dest->type = NGTCP2_FRAME_MAX_DATA; + p = ngtcp2_get_uvarint(&dest->max_data, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_max_stream_data_frame( + ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen) { + size_t len = 1 + 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_MAX_STREAM_DATA; + p = ngtcp2_get_varint(&dest->stream_id, p); + p = ngtcp2_get_uvarint(&dest->max_stream_data, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_max_streams_frame(ngtcp2_max_streams *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + dest->type = payload[0]; + p = ngtcp2_get_uvarint(&dest->max_streams, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_ping_frame(ngtcp2_ping *dest, + const uint8_t *payload, + size_t payloadlen) { + (void)payload; + (void)payloadlen; + + dest->type = NGTCP2_FRAME_PING; + return 1; +} + +ngtcp2_ssize ngtcp2_pkt_decode_data_blocked_frame(ngtcp2_data_blocked *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + dest->type = NGTCP2_FRAME_DATA_BLOCKED; + p = ngtcp2_get_uvarint(&dest->offset, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_STREAM_DATA_BLOCKED; + p = ngtcp2_get_varint(&dest->stream_id, p); + p = ngtcp2_get_uvarint(&dest->offset, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame( + ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen) { + size_t len = 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + dest->type = payload[0]; + p = ngtcp2_get_uvarint(&dest->max_streams, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame( + ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen) { + size_t len = 1 + 1 + 1 + 1 + 16; + const uint8_t *p; + size_t n; + size_t cil; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + cil = *p; + if (cil < NGTCP2_MIN_CIDLEN || cil > NGTCP2_MAX_CIDLEN) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + len += cil; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_NEW_CONNECTION_ID; + p = ngtcp2_get_uvarint(&dest->seq, p); + p = ngtcp2_get_uvarint(&dest->retire_prior_to, p); + ++p; + ngtcp2_cid_init(&dest->cid, p, cil); + p += cil; + p = ngtcp2_get_bytes(dest->stateless_reset_token, p, + NGTCP2_STATELESS_RESET_TOKENLEN); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_stop_sending_frame(ngtcp2_stop_sending *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + p += n; + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_STOP_SENDING; + p = ngtcp2_get_varint(&dest->stream_id, p); + p = ngtcp2_get_uvarint(&dest->app_error_code, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_path_challenge_frame(ngtcp2_path_challenge *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 8; + const uint8_t *p; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_PATH_CHALLENGE; + ngtcp2_cpymem(dest->data, p, sizeof(dest->data)); + p += sizeof(dest->data); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_path_response_frame(ngtcp2_path_response *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 8; + const uint8_t *p; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + dest->type = NGTCP2_FRAME_PATH_RESPONSE; + ngtcp2_cpymem(dest->data, p, sizeof(dest->data)); + p += sizeof(dest->data); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_crypto_frame(ngtcp2_crypto *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1 + 1; + const uint8_t *p; + size_t datalen; + size_t ndatalen; + size_t n; + uint64_t vi; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p += n; + + ndatalen = ngtcp2_get_uvarintlen(p); + len += ndatalen - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = ngtcp2_get_uvarint(&vi, p); + if (payloadlen - len < vi) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + datalen = (size_t)vi; + len += datalen; + + p = payload + 1; + + dest->type = NGTCP2_FRAME_CRYPTO; + p = ngtcp2_get_uvarint(&dest->offset, p); + dest->data[0].len = datalen; + p += ndatalen; + if (dest->data[0].len) { + dest->data[0].base = (uint8_t *)p; + p += dest->data[0].len; + dest->datacnt = 1; + } else { + dest->data[0].base = NULL; + dest->datacnt = 0; + } + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_new_token_frame(ngtcp2_new_token *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1; + const uint8_t *p; + size_t n; + size_t datalen; + uint64_t vi; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = ngtcp2_get_uvarint(&vi, p); + if (payloadlen - len < vi) { + return NGTCP2_ERR_FRAME_ENCODING; + } + datalen = (size_t)vi; + len += datalen; + + dest->type = NGTCP2_FRAME_NEW_TOKEN; + dest->tokenlen = datalen; + dest->token = (uint8_t *)p; + p += dest->tokenlen; + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_decode_retire_connection_id_frame(ngtcp2_retire_connection_id *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1 + 1; + const uint8_t *p; + size_t n; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = payload + 1; + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + dest->type = NGTCP2_FRAME_RETIRE_CONNECTION_ID; + p = ngtcp2_get_uvarint(&dest->seq, p); + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_decode_handshake_done_frame(ngtcp2_handshake_done *dest, + const uint8_t *payload, + size_t payloadlen) { + (void)payload; + (void)payloadlen; + + dest->type = NGTCP2_FRAME_HANDSHAKE_DONE; + return 1; +} + +ngtcp2_ssize ngtcp2_pkt_decode_datagram_frame(ngtcp2_datagram *dest, + const uint8_t *payload, + size_t payloadlen) { + size_t len = 1; + const uint8_t *p; + uint8_t type; + size_t datalen; + size_t n; + uint64_t vi; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + type = payload[0]; + + p = payload + 1; + + switch (type) { + case NGTCP2_FRAME_DATAGRAM: + datalen = payloadlen - 1; + len = payloadlen; + break; + case NGTCP2_FRAME_DATAGRAM_LEN: + ++len; + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + n = ngtcp2_get_uvarintlen(p); + len += n - 1; + + if (payloadlen < len) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + p = ngtcp2_get_uvarint(&vi, p); + if (payloadlen - len < vi) { + return NGTCP2_ERR_FRAME_ENCODING; + } + + datalen = (size_t)vi; + len += datalen; + break; + default: + ngtcp2_unreachable(); + } + + dest->type = type; + + if (datalen == 0) { + dest->datacnt = 0; + dest->data = NULL; + } else { + dest->datacnt = 1; + dest->data = dest->rdata; + dest->rdata[0].len = datalen; + + dest->rdata[0].base = (uint8_t *)p; + p += datalen; + } + + assert((size_t)(p - payload) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_frame(uint8_t *out, size_t outlen, + ngtcp2_frame *fr) { + switch (fr->type) { + case NGTCP2_FRAME_STREAM: + return ngtcp2_pkt_encode_stream_frame(out, outlen, &fr->stream); + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + return ngtcp2_pkt_encode_ack_frame(out, outlen, &fr->ack); + case NGTCP2_FRAME_PADDING: + return ngtcp2_pkt_encode_padding_frame(out, outlen, &fr->padding); + case NGTCP2_FRAME_RESET_STREAM: + return ngtcp2_pkt_encode_reset_stream_frame(out, outlen, &fr->reset_stream); + case NGTCP2_FRAME_CONNECTION_CLOSE: + case NGTCP2_FRAME_CONNECTION_CLOSE_APP: + return ngtcp2_pkt_encode_connection_close_frame(out, outlen, + &fr->connection_close); + case NGTCP2_FRAME_MAX_DATA: + return ngtcp2_pkt_encode_max_data_frame(out, outlen, &fr->max_data); + case NGTCP2_FRAME_MAX_STREAM_DATA: + return ngtcp2_pkt_encode_max_stream_data_frame(out, outlen, + &fr->max_stream_data); + case NGTCP2_FRAME_MAX_STREAMS_BIDI: + case NGTCP2_FRAME_MAX_STREAMS_UNI: + return ngtcp2_pkt_encode_max_streams_frame(out, outlen, &fr->max_streams); + case NGTCP2_FRAME_PING: + return ngtcp2_pkt_encode_ping_frame(out, outlen, &fr->ping); + case NGTCP2_FRAME_DATA_BLOCKED: + return ngtcp2_pkt_encode_data_blocked_frame(out, outlen, &fr->data_blocked); + case NGTCP2_FRAME_STREAM_DATA_BLOCKED: + return ngtcp2_pkt_encode_stream_data_blocked_frame( + out, outlen, &fr->stream_data_blocked); + case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: + case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: + return ngtcp2_pkt_encode_streams_blocked_frame(out, outlen, + &fr->streams_blocked); + case NGTCP2_FRAME_NEW_CONNECTION_ID: + return ngtcp2_pkt_encode_new_connection_id_frame(out, outlen, + &fr->new_connection_id); + case NGTCP2_FRAME_STOP_SENDING: + return ngtcp2_pkt_encode_stop_sending_frame(out, outlen, &fr->stop_sending); + case NGTCP2_FRAME_PATH_CHALLENGE: + return ngtcp2_pkt_encode_path_challenge_frame(out, outlen, + &fr->path_challenge); + case NGTCP2_FRAME_PATH_RESPONSE: + return ngtcp2_pkt_encode_path_response_frame(out, outlen, + &fr->path_response); + case NGTCP2_FRAME_CRYPTO: + return ngtcp2_pkt_encode_crypto_frame(out, outlen, &fr->crypto); + case NGTCP2_FRAME_NEW_TOKEN: + return ngtcp2_pkt_encode_new_token_frame(out, outlen, &fr->new_token); + case NGTCP2_FRAME_RETIRE_CONNECTION_ID: + return ngtcp2_pkt_encode_retire_connection_id_frame( + out, outlen, &fr->retire_connection_id); + case NGTCP2_FRAME_HANDSHAKE_DONE: + return ngtcp2_pkt_encode_handshake_done_frame(out, outlen, + &fr->handshake_done); + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + return ngtcp2_pkt_encode_datagram_frame(out, outlen, &fr->datagram); + default: + return NGTCP2_ERR_INVALID_ARGUMENT; + } +} + +ngtcp2_ssize ngtcp2_pkt_encode_stream_frame(uint8_t *out, size_t outlen, + ngtcp2_stream *fr) { + size_t len = 1; + uint8_t flags = NGTCP2_STREAM_LEN_BIT; + uint8_t *p; + size_t i; + size_t datalen = 0; + + if (fr->fin) { + flags |= NGTCP2_STREAM_FIN_BIT; + } + + if (fr->offset) { + flags |= NGTCP2_STREAM_OFF_BIT; + len += ngtcp2_put_uvarintlen(fr->offset); + } + + len += ngtcp2_put_uvarintlen((uint64_t)fr->stream_id); + + for (i = 0; i < fr->datacnt; ++i) { + datalen += fr->data[i].len; + } + + len += ngtcp2_put_uvarintlen(datalen); + len += datalen; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = flags | NGTCP2_FRAME_STREAM; + + fr->flags = flags; + + p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id); + + if (fr->offset) { + p = ngtcp2_put_uvarint(p, fr->offset); + } + + p = ngtcp2_put_uvarint(p, datalen); + + for (i = 0; i < fr->datacnt; ++i) { + assert(fr->data[i].len); + assert(fr->data[i].base); + p = ngtcp2_cpymem(p, fr->data[i].base, fr->data[i].len); + } + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_ack_frame(uint8_t *out, size_t outlen, + ngtcp2_ack *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->largest_ack) + + ngtcp2_put_uvarintlen(fr->ack_delay) + + ngtcp2_put_uvarintlen(fr->rangecnt) + + ngtcp2_put_uvarintlen(fr->first_ack_range); + uint8_t *p; + size_t i; + const ngtcp2_ack_range *range; + + for (i = 0; i < fr->rangecnt; ++i) { + range = &fr->ranges[i]; + len += ngtcp2_put_uvarintlen(range->gap); + len += ngtcp2_put_uvarintlen(range->len); + } + + if (fr->type == NGTCP2_FRAME_ACK_ECN) { + len += ngtcp2_put_uvarintlen(fr->ecn.ect0) + + ngtcp2_put_uvarintlen(fr->ecn.ect1) + + ngtcp2_put_uvarintlen(fr->ecn.ce); + } + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = fr->type; + p = ngtcp2_put_uvarint(p, (uint64_t)fr->largest_ack); + p = ngtcp2_put_uvarint(p, fr->ack_delay); + p = ngtcp2_put_uvarint(p, fr->rangecnt); + p = ngtcp2_put_uvarint(p, fr->first_ack_range); + + for (i = 0; i < fr->rangecnt; ++i) { + range = &fr->ranges[i]; + p = ngtcp2_put_uvarint(p, range->gap); + p = ngtcp2_put_uvarint(p, range->len); + } + + if (fr->type == NGTCP2_FRAME_ACK_ECN) { + p = ngtcp2_put_uvarint(p, fr->ecn.ect0); + p = ngtcp2_put_uvarint(p, fr->ecn.ect1); + p = ngtcp2_put_uvarint(p, fr->ecn.ce); + } + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_padding_frame(uint8_t *out, size_t outlen, + const ngtcp2_padding *fr) { + if (outlen < fr->len) { + return NGTCP2_ERR_NOBUF; + } + + memset(out, 0, fr->len); + + return (ngtcp2_ssize)fr->len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_reset_stream_frame(uint8_t *out, size_t outlen, + const ngtcp2_reset_stream *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) + + ngtcp2_put_uvarintlen(fr->app_error_code) + + ngtcp2_put_uvarintlen(fr->final_size); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_RESET_STREAM; + p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id); + p = ngtcp2_put_uvarint(p, fr->app_error_code); + p = ngtcp2_put_uvarint(p, fr->final_size); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_connection_close_frame(uint8_t *out, size_t outlen, + const ngtcp2_connection_close *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->error_code) + + (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE + ? ngtcp2_put_uvarintlen(fr->frame_type) + : 0) + + ngtcp2_put_uvarintlen(fr->reasonlen) + fr->reasonlen; + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = fr->type; + p = ngtcp2_put_uvarint(p, fr->error_code); + if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) { + p = ngtcp2_put_uvarint(p, fr->frame_type); + } + p = ngtcp2_put_uvarint(p, fr->reasonlen); + if (fr->reasonlen) { + p = ngtcp2_cpymem(p, fr->reason, fr->reasonlen); + } + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_max_data_frame(uint8_t *out, size_t outlen, + const ngtcp2_max_data *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->max_data); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_MAX_DATA; + p = ngtcp2_put_uvarint(p, fr->max_data); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_max_stream_data_frame(uint8_t *out, size_t outlen, + const ngtcp2_max_stream_data *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) + + ngtcp2_put_uvarintlen(fr->max_stream_data); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_MAX_STREAM_DATA; + p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id); + p = ngtcp2_put_uvarint(p, fr->max_stream_data); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_max_streams_frame(uint8_t *out, size_t outlen, + const ngtcp2_max_streams *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->max_streams); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = fr->type; + p = ngtcp2_put_uvarint(p, fr->max_streams); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_ping_frame(uint8_t *out, size_t outlen, + const ngtcp2_ping *fr) { + (void)fr; + + if (outlen < 1) { + return NGTCP2_ERR_NOBUF; + } + + *out++ = NGTCP2_FRAME_PING; + + return 1; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_data_blocked_frame(uint8_t *out, size_t outlen, + const ngtcp2_data_blocked *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->offset); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_DATA_BLOCKED; + p = ngtcp2_put_uvarint(p, fr->offset); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_stream_data_blocked_frame( + uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) + + ngtcp2_put_uvarintlen(fr->offset); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_STREAM_DATA_BLOCKED; + p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id); + p = ngtcp2_put_uvarint(p, fr->offset); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_streams_blocked_frame(uint8_t *out, size_t outlen, + const ngtcp2_streams_blocked *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->max_streams); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = fr->type; + p = ngtcp2_put_uvarint(p, fr->max_streams); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_new_connection_id_frame(uint8_t *out, size_t outlen, + const ngtcp2_new_connection_id *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->seq) + + ngtcp2_put_uvarintlen(fr->retire_prior_to) + 1 + + fr->cid.datalen + NGTCP2_STATELESS_RESET_TOKENLEN; + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_NEW_CONNECTION_ID; + p = ngtcp2_put_uvarint(p, fr->seq); + p = ngtcp2_put_uvarint(p, fr->retire_prior_to); + *p++ = (uint8_t)fr->cid.datalen; + p = ngtcp2_cpymem(p, fr->cid.data, fr->cid.datalen); + p = ngtcp2_cpymem(p, fr->stateless_reset_token, + NGTCP2_STATELESS_RESET_TOKENLEN); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_stop_sending_frame(uint8_t *out, size_t outlen, + const ngtcp2_stop_sending *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) + + ngtcp2_put_uvarintlen(fr->app_error_code); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_STOP_SENDING; + p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id); + p = ngtcp2_put_uvarint(p, fr->app_error_code); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_path_challenge_frame(uint8_t *out, size_t outlen, + const ngtcp2_path_challenge *fr) { + size_t len = 1 + 8; + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_PATH_CHALLENGE; + p = ngtcp2_cpymem(p, fr->data, sizeof(fr->data)); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_path_response_frame(uint8_t *out, size_t outlen, + const ngtcp2_path_response *fr) { + size_t len = 1 + 8; + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_PATH_RESPONSE; + p = ngtcp2_cpymem(p, fr->data, sizeof(fr->data)); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_crypto_frame(uint8_t *out, size_t outlen, + const ngtcp2_crypto *fr) { + size_t len = 1; + uint8_t *p; + size_t i; + size_t datalen = 0; + + len += ngtcp2_put_uvarintlen(fr->offset); + + for (i = 0; i < fr->datacnt; ++i) { + datalen += fr->data[i].len; + } + + len += ngtcp2_put_uvarintlen(datalen); + len += datalen; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_CRYPTO; + + p = ngtcp2_put_uvarint(p, fr->offset); + p = ngtcp2_put_uvarint(p, datalen); + + for (i = 0; i < fr->datacnt; ++i) { + assert(fr->data[i].base); + p = ngtcp2_cpymem(p, fr->data[i].base, fr->data[i].len); + } + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_new_token_frame(uint8_t *out, size_t outlen, + const ngtcp2_new_token *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->tokenlen) + fr->tokenlen; + uint8_t *p; + + assert(fr->tokenlen); + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_NEW_TOKEN; + + p = ngtcp2_put_uvarint(p, fr->tokenlen); + p = ngtcp2_cpymem(p, fr->token, fr->tokenlen); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_encode_retire_connection_id_frame( + uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr) { + size_t len = 1 + ngtcp2_put_uvarintlen(fr->seq); + uint8_t *p; + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = NGTCP2_FRAME_RETIRE_CONNECTION_ID; + + p = ngtcp2_put_uvarint(p, fr->seq); + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize +ngtcp2_pkt_encode_handshake_done_frame(uint8_t *out, size_t outlen, + const ngtcp2_handshake_done *fr) { + (void)fr; + + if (outlen < 1) { + return NGTCP2_ERR_NOBUF; + } + + *out++ = NGTCP2_FRAME_HANDSHAKE_DONE; + + return 1; +} + +ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen, + const ngtcp2_datagram *fr) { + uint64_t datalen = ngtcp2_vec_len(fr->data, fr->datacnt); + uint64_t len = + 1 + + (fr->type == NGTCP2_FRAME_DATAGRAM ? 0 : ngtcp2_put_uvarintlen(datalen)) + + datalen; + uint8_t *p; + size_t i; + + assert(fr->type == NGTCP2_FRAME_DATAGRAM || + fr->type == NGTCP2_FRAME_DATAGRAM_LEN); + + if (outlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = out; + + *p++ = fr->type; + if (fr->type == NGTCP2_FRAME_DATAGRAM_LEN) { + p = ngtcp2_put_uvarint(p, datalen); + } + + for (i = 0; i < fr->datacnt; ++i) { + assert(fr->data[i].len); + assert(fr->data[i].base); + p = ngtcp2_cpymem(p, fr->data[i].base, fr->data[i].len); + } + + assert((size_t)(p - out) == len); + + return (ngtcp2_ssize)len; +} + +ngtcp2_ssize ngtcp2_pkt_write_version_negotiation( + uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid, + size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv, + size_t nsv) { + size_t len = 1 + 4 + 1 + dcidlen + 1 + scidlen + nsv * 4; + uint8_t *p; + size_t i; + + assert(dcidlen < 256); + assert(scidlen < 256); + + if (destlen < len) { + return NGTCP2_ERR_NOBUF; + } + + p = dest; + + *p++ = 0x80 | unused_random; + p = ngtcp2_put_uint32be(p, 0); + *p++ = (uint8_t)dcidlen; + if (dcidlen) { + p = ngtcp2_cpymem(p, dcid, dcidlen); + } + *p++ = (uint8_t)scidlen; + if (scidlen) { + p = ngtcp2_cpymem(p, scid, scidlen); + } + + for (i = 0; i < nsv; ++i) { + p = ngtcp2_put_uint32be(p, sv[i]); + } + + assert((size_t)(p - dest) == len); + + return (ngtcp2_ssize)len; +} + +size_t ngtcp2_pkt_decode_version_negotiation(uint32_t *dest, + const uint8_t *payload, + size_t payloadlen) { + const uint8_t *end = payload + payloadlen; + + assert((payloadlen % sizeof(uint32_t)) == 0); + + for (; payload != end;) { + payload = ngtcp2_get_uint32(dest++, payload); + } + + return payloadlen / sizeof(uint32_t); +} + +int ngtcp2_pkt_decode_stateless_reset(ngtcp2_pkt_stateless_reset *sr, + const uint8_t *payload, + size_t payloadlen) { + const uint8_t *p = payload; + + if (payloadlen < + NGTCP2_MIN_STATELESS_RESET_RANDLEN + NGTCP2_STATELESS_RESET_TOKENLEN) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + sr->rand = p; + sr->randlen = payloadlen - NGTCP2_STATELESS_RESET_TOKENLEN; + p += sr->randlen; + memcpy(sr->stateless_reset_token, p, NGTCP2_STATELESS_RESET_TOKENLEN); + + return 0; +} + +int ngtcp2_pkt_decode_retry(ngtcp2_pkt_retry *dest, const uint8_t *payload, + size_t payloadlen) { + size_t len = /* token */ 1 + NGTCP2_RETRY_TAGLEN; + + if (payloadlen < len) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + dest->token = (uint8_t *)payload; + dest->tokenlen = (size_t)(payloadlen - NGTCP2_RETRY_TAGLEN); + ngtcp2_cpymem(dest->tag, payload + dest->tokenlen, NGTCP2_RETRY_TAGLEN); + + return 0; +} + +int64_t ngtcp2_pkt_adjust_pkt_num(int64_t max_pkt_num, int64_t pkt_num, + size_t n) { + int64_t expected = max_pkt_num + 1; + int64_t win = (int64_t)1 << n; + int64_t hwin = win / 2; + int64_t mask = win - 1; + int64_t cand = (expected & ~mask) | pkt_num; + + if (cand <= expected - hwin) { + assert(cand <= (int64_t)NGTCP2_MAX_VARINT - win); + return cand + win; + } + if (cand > expected + hwin && cand >= win) { + return cand - win; + } + return cand; +} + +int ngtcp2_pkt_validate_ack(ngtcp2_ack *fr) { + int64_t largest_ack = fr->largest_ack; + size_t i; + + if (largest_ack < (int64_t)fr->first_ack_range) { + return NGTCP2_ERR_ACK_FRAME; + } + + largest_ack -= (int64_t)fr->first_ack_range; + + for (i = 0; i < fr->rangecnt; ++i) { + if (largest_ack < (int64_t)fr->ranges[i].gap + 2) { + return NGTCP2_ERR_ACK_FRAME; + } + + largest_ack -= (int64_t)fr->ranges[i].gap + 2; + + if (largest_ack < (int64_t)fr->ranges[i].len) { + return NGTCP2_ERR_ACK_FRAME; + } + + largest_ack -= (int64_t)fr->ranges[i].len; + } + + return 0; +} + +ngtcp2_ssize +ngtcp2_pkt_write_stateless_reset(uint8_t *dest, size_t destlen, + const uint8_t *stateless_reset_token, + const uint8_t *rand, size_t randlen) { + uint8_t *p; + + if (destlen < + NGTCP2_MIN_STATELESS_RESET_RANDLEN + NGTCP2_STATELESS_RESET_TOKENLEN) { + return NGTCP2_ERR_NOBUF; + } + + if (randlen < NGTCP2_MIN_STATELESS_RESET_RANDLEN) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + p = dest; + + randlen = ngtcp2_min(destlen - NGTCP2_STATELESS_RESET_TOKENLEN, randlen); + + p = ngtcp2_cpymem(p, rand, randlen); + p = ngtcp2_cpymem(p, stateless_reset_token, NGTCP2_STATELESS_RESET_TOKENLEN); + *dest = (uint8_t)((*dest & 0x7fu) | 0x40u); + + return p - dest; +} + +ngtcp2_ssize ngtcp2_pkt_write_retry( + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, + size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx) { + ngtcp2_pkt_hd hd; + uint8_t pseudo_retry[1500]; + ngtcp2_ssize pseudo_retrylen; + uint8_t tag[NGTCP2_RETRY_TAGLEN]; + int rv; + uint8_t *p; + size_t offset; + const uint8_t *nonce; + size_t noncelen; + + assert(tokenlen > 0); + assert(!ngtcp2_cid_eq(scid, odcid)); + + /* Retry packet is sent at most once per one connection attempt. In + the first connection attempt, client has to send random DCID + which is at least NGTCP2_MIN_INITIAL_DCIDLEN bytes long. */ + if (odcid->datalen < NGTCP2_MIN_INITIAL_DCIDLEN) { + return NGTCP2_ERR_INVALID_ARGUMENT; + } + + ngtcp2_pkt_hd_init(&hd, NGTCP2_PKT_FLAG_LONG_FORM, NGTCP2_PKT_RETRY, dcid, + scid, /* pkt_num = */ 0, /* pkt_numlen = */ 1, version, + /* len = */ 0); + + pseudo_retrylen = + ngtcp2_pkt_encode_pseudo_retry(pseudo_retry, sizeof(pseudo_retry), &hd, + /* unused = */ 0, odcid, token, tokenlen); + if (pseudo_retrylen < 0) { + return pseudo_retrylen; + } + + switch (version) { + case NGTCP2_PROTO_VER_V1: + nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V1; + noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1; + break; + case NGTCP2_PROTO_VER_V2: + nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V2; + noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1; + break; + default: + nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_DRAFT; + noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1; + } + + /* OpenSSL does not like NULL plaintext. */ + rv = encrypt(tag, aead, aead_ctx, (const uint8_t *)"", 0, nonce, noncelen, + pseudo_retry, (size_t)pseudo_retrylen); + if (rv != 0) { + return rv; + } + + offset = 1 + odcid->datalen; + if (destlen < (size_t)pseudo_retrylen + sizeof(tag) - offset) { + return NGTCP2_ERR_NOBUF; + } + + p = ngtcp2_cpymem(dest, pseudo_retry + offset, + (size_t)pseudo_retrylen - offset); + p = ngtcp2_cpymem(p, tag, sizeof(tag)); + + return p - dest; +} + +ngtcp2_ssize ngtcp2_pkt_encode_pseudo_retry( + uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused, + const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen) { + uint8_t *p = dest; + ngtcp2_ssize nwrite; + + if (destlen < 1 + odcid->datalen) { + return NGTCP2_ERR_NOBUF; + } + + *p++ = (uint8_t)odcid->datalen; + p = ngtcp2_cpymem(p, odcid->data, odcid->datalen); + destlen -= (size_t)(p - dest); + + nwrite = ngtcp2_pkt_encode_hd_long(p, destlen, hd); + if (nwrite < 0) { + return nwrite; + } + + if (destlen < (size_t)nwrite + tokenlen) { + return NGTCP2_ERR_NOBUF; + } + + *p &= 0xf0; + *p |= unused; + + p += nwrite; + + p = ngtcp2_cpymem(p, token, tokenlen); + + return p - dest; +} + +int ngtcp2_pkt_verify_retry_tag(uint32_t version, const ngtcp2_pkt_retry *retry, + const uint8_t *pkt, size_t pktlen, + ngtcp2_encrypt encrypt, + const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx) { + uint8_t pseudo_retry[1500]; + size_t pseudo_retrylen; + uint8_t *p = pseudo_retry; + int rv; + uint8_t tag[NGTCP2_RETRY_TAGLEN]; + const uint8_t *nonce; + size_t noncelen; + + assert(pktlen >= sizeof(retry->tag)); + + if (sizeof(pseudo_retry) < + 1 + retry->odcid.datalen + pktlen - sizeof(retry->tag)) { + return NGTCP2_ERR_PROTO; + } + + *p++ = (uint8_t)retry->odcid.datalen; + p = ngtcp2_cpymem(p, retry->odcid.data, retry->odcid.datalen); + p = ngtcp2_cpymem(p, pkt, pktlen - sizeof(retry->tag)); + + pseudo_retrylen = (size_t)(p - pseudo_retry); + + switch (version) { + case NGTCP2_PROTO_VER_V1: + nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V1; + noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1; + break; + case NGTCP2_PROTO_VER_V2: + nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V2; + noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1; + break; + default: + nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_DRAFT; + noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1; + } + + /* OpenSSL does not like NULL plaintext. */ + rv = encrypt(tag, aead, aead_ctx, (const uint8_t *)"", 0, nonce, noncelen, + pseudo_retry, pseudo_retrylen); + if (rv != 0) { + return rv; + } + + if (0 != memcmp(retry->tag, tag, sizeof(retry->tag))) { + return NGTCP2_ERR_PROTO; + } + + return 0; +} + +size_t ngtcp2_pkt_stream_max_datalen(int64_t stream_id, uint64_t offset, + uint64_t len, size_t left) { + size_t n = 1 /* type */ + ngtcp2_put_uvarintlen((uint64_t)stream_id) + + (offset ? ngtcp2_put_uvarintlen(offset) : 0); + + if (left <= n) { + return (size_t)-1; + } + + left -= n; + + if (left > 8 + 1073741823 && len > 1073741823) { +#if SIZE_MAX > UINT32_MAX + len = ngtcp2_min(len, 4611686018427387903lu); +#endif /* SIZE_MAX > UINT32_MAX */ + return (size_t)ngtcp2_min(len, (uint64_t)(left - 8)); + } + + if (left > 4 + 16383 && len > 16383) { + len = ngtcp2_min(len, 1073741823); + return (size_t)ngtcp2_min(len, (uint64_t)(left - 4)); + } + + if (left > 2 + 63 && len > 63) { + len = ngtcp2_min(len, 16383); + return (size_t)ngtcp2_min(len, (uint64_t)(left - 2)); + } + + len = ngtcp2_min(len, 63); + return (size_t)ngtcp2_min(len, (uint64_t)(left - 1)); +} + +size_t ngtcp2_pkt_crypto_max_datalen(uint64_t offset, size_t len, size_t left) { + size_t n = 1 /* type */ + ngtcp2_put_uvarintlen(offset); + + /* CRYPTO frame must contain nonzero length data. Return -1 if + there is no space to write crypto data. */ + if (left <= n + 1) { + return (size_t)-1; + } + + left -= n; + + if (left > 8 + 1073741823 && len > 1073741823) { +#if SIZE_MAX > UINT32_MAX + len = ngtcp2_min(len, 4611686018427387903lu); +#endif /* SIZE_MAX > UINT32_MAX */ + return ngtcp2_min(len, left - 8); + } + + if (left > 4 + 16383 && len > 16383) { + len = ngtcp2_min(len, 1073741823); + return ngtcp2_min(len, left - 4); + } + + if (left > 2 + 63 && len > 63) { + len = ngtcp2_min(len, 16383); + return ngtcp2_min(len, left - 2); + } + + len = ngtcp2_min(len, 63); + return ngtcp2_min(len, left - 1); +} + +size_t ngtcp2_pkt_datagram_framelen(size_t len) { + return 1 /* type */ + ngtcp2_put_uvarintlen(len) + len; +} + +int ngtcp2_is_supported_version(uint32_t version) { + switch (version) { + case NGTCP2_PROTO_VER_V1: + case NGTCP2_PROTO_VER_V2: + return 1; + default: + return NGTCP2_PROTO_VER_DRAFT_MIN <= version && + version <= NGTCP2_PROTO_VER_DRAFT_MAX; + } +} + +int ngtcp2_is_reserved_version(uint32_t version) { + return (version & NGTCP2_RESERVED_VERSION_MASK) == + NGTCP2_RESERVED_VERSION_MASK; +} + +uint8_t ngtcp2_pkt_get_type_long(uint32_t version, uint8_t c) { + uint8_t pkt_type = (uint8_t)((c & NGTCP2_LONG_TYPE_MASK) >> 4); + + switch (version) { + case NGTCP2_PROTO_VER_V2: + switch (pkt_type) { + case NGTCP2_PKT_TYPE_INITIAL_V2: + return NGTCP2_PKT_INITIAL; + case NGTCP2_PKT_TYPE_0RTT_V2: + return NGTCP2_PKT_0RTT; + case NGTCP2_PKT_TYPE_HANDSHAKE_V2: + return NGTCP2_PKT_HANDSHAKE; + case NGTCP2_PKT_TYPE_RETRY_V2: + return NGTCP2_PKT_RETRY; + default: + return 0; + } + default: + if (!ngtcp2_is_supported_version(version)) { + return 0; + } + + /* QUIC v1 and draft versions share the same numeric packet + types. */ + switch (pkt_type) { + case NGTCP2_PKT_TYPE_INITIAL_V1: + return NGTCP2_PKT_INITIAL; + case NGTCP2_PKT_TYPE_0RTT_V1: + return NGTCP2_PKT_0RTT; + case NGTCP2_PKT_TYPE_HANDSHAKE_V1: + return NGTCP2_PKT_HANDSHAKE; + case NGTCP2_PKT_TYPE_RETRY_V1: + return NGTCP2_PKT_RETRY; + default: + return 0; + } + } +} + +uint8_t ngtcp2_pkt_versioned_type(uint32_t version, uint32_t pkt_type) { + switch (version) { + case NGTCP2_PROTO_VER_V2: + switch (pkt_type) { + case NGTCP2_PKT_INITIAL: + return NGTCP2_PKT_TYPE_INITIAL_V2; + case NGTCP2_PKT_0RTT: + return NGTCP2_PKT_TYPE_0RTT_V2; + case NGTCP2_PKT_HANDSHAKE: + return NGTCP2_PKT_TYPE_HANDSHAKE_V2; + case NGTCP2_PKT_RETRY: + return NGTCP2_PKT_TYPE_RETRY_V2; + default: + ngtcp2_unreachable(); + } + default: + /* Assume that unsupported versions share the numeric long packet + types with QUIC v1 in order to send a packet to elicit Version + Negotiation packet. */ + + /* QUIC v1 and draft versions share the same numeric packet + types. */ + switch (pkt_type) { + case NGTCP2_PKT_INITIAL: + return NGTCP2_PKT_TYPE_INITIAL_V1; + case NGTCP2_PKT_0RTT: + return NGTCP2_PKT_TYPE_0RTT_V1; + case NGTCP2_PKT_HANDSHAKE: + return NGTCP2_PKT_TYPE_HANDSHAKE_V1; + case NGTCP2_PKT_RETRY: + return NGTCP2_PKT_TYPE_RETRY_V1; + default: + ngtcp2_unreachable(); + } + } +} + +int ngtcp2_pkt_verify_reserved_bits(uint8_t c) { + if (c & NGTCP2_HEADER_FORM_BIT) { + return (c & NGTCP2_LONG_RESERVED_BIT_MASK) == 0 ? 0 : NGTCP2_ERR_PROTO; + } + return (c & NGTCP2_SHORT_RESERVED_BIT_MASK) == 0 ? 0 : NGTCP2_ERR_PROTO; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h new file mode 100644 index 0000000..9db62b0 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h @@ -0,0 +1,1235 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_PKT_H +#define NGTCP2_PKT_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +/* QUIC header macros */ +#define NGTCP2_HEADER_FORM_BIT 0x80 +#define NGTCP2_FIXED_BIT_MASK 0x40 +#define NGTCP2_PKT_NUMLEN_MASK 0x03 + +/* Long header specific macros */ +#define NGTCP2_LONG_TYPE_MASK 0x30 +#define NGTCP2_LONG_RESERVED_BIT_MASK 0x0c + +/* Short header specific macros */ +#define NGTCP2_SHORT_SPIN_BIT_MASK 0x20 +#define NGTCP2_SHORT_RESERVED_BIT_MASK 0x18 +#define NGTCP2_SHORT_KEY_PHASE_BIT 0x04 + +/* NGTCP2_SR_TYPE is a Type field of Stateless Reset. */ +#define NGTCP2_SR_TYPE 0x1f + +/* NGTCP2_MIN_LONG_HEADERLEN is the minimum length of long header. + That is (1|1|TT|RR|PP)<1> + VERSION<4> + DCIL<1> + SCIL<1> + + LENGTH<1> + PKN<1> */ +#define NGTCP2_MIN_LONG_HEADERLEN (1 + 4 + 1 + 1 + 1 + 1) + +#define NGTCP2_STREAM_FIN_BIT 0x01 +#define NGTCP2_STREAM_LEN_BIT 0x02 +#define NGTCP2_STREAM_OFF_BIT 0x04 + +/* NGTCP2_STREAM_OVERHEAD is the maximum number of bytes required + other than payload for STREAM frame. That is from type field to + the beginning of the payload. */ +#define NGTCP2_STREAM_OVERHEAD (1 + 8 + 8 + 8) + +/* NGTCP2_CRYPTO_OVERHEAD is the maximum number of bytes required + other than payload for CRYPTO frame. That is from type field to + the beginning of the payload. */ +#define NGTCP2_CRYPTO_OVERHEAD (1 + 8 + 8) + +/* NGTCP2_DATAGRAM_OVERHEAD is the maximum number of bytes required + other than payload for DATAGRAM frame. That is from type field to + the beginning of the payload. */ +#define NGTCP2_DATAGRAM_OVERHEAD (1 + 8) + +/* NGTCP2_MIN_FRAME_PAYLOADLEN is the minimum frame payload length. */ +#define NGTCP2_MIN_FRAME_PAYLOADLEN 16 + +/* NGTCP2_MAX_SERVER_STREAM_ID_BIDI is the maximum bidirectional + server stream ID. */ +#define NGTCP2_MAX_SERVER_STREAM_ID_BIDI ((int64_t)0x3ffffffffffffffdll) +/* NGTCP2_MAX_CLIENT_STREAM_ID_BIDI is the maximum bidirectional + client stream ID. */ +#define NGTCP2_MAX_CLIENT_STREAM_ID_BIDI ((int64_t)0x3ffffffffffffffcll) +/* NGTCP2_MAX_SERVER_STREAM_ID_UNI is the maximum unidirectional + server stream ID. */ +#define NGTCP2_MAX_SERVER_STREAM_ID_UNI ((int64_t)0x3fffffffffffffffll) +/* NGTCP2_MAX_CLIENT_STREAM_ID_UNI is the maximum unidirectional + client stream ID. */ +#define NGTCP2_MAX_CLIENT_STREAM_ID_UNI ((int64_t)0x3ffffffffffffffell) + +/* NGTCP2_MAX_NUM_ACK_RANGES is the maximum number of Additional ACK + ranges which this library can create, or decode. */ +#define NGTCP2_MAX_ACK_RANGES 32 + +/* NGTCP2_MAX_PKT_NUM is the maximum packet number. */ +#define NGTCP2_MAX_PKT_NUM ((int64_t)((1ll << 62) - 1)) + +/* NGTCP2_MIN_PKT_EXPANDLEN is the minimum packet size expansion in + addition to the minimum DCID length to hide/trigger Stateless + Reset. */ +#define NGTCP2_MIN_PKT_EXPANDLEN 22 + +/* NGTCP2_RETRY_TAGLEN is the length of Retry packet integrity tag. */ +#define NGTCP2_RETRY_TAGLEN 16 + +/* NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE is the maximum UDP payload size + that this library can write. */ +#define NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE ((1 << 24) - 1) + +/* NGTCP2_PKT_LENGTHLEN is the number of bytes that is occupied by + Length field in Long packet header. */ +#define NGTCP2_PKT_LENGTHLEN 4 + +/* NGTCP2_PKT_TYPE_INITIAL_V1 is Initial long header packet type for + QUIC v1. */ +#define NGTCP2_PKT_TYPE_INITIAL_V1 0x0 +/* NGTCP2_PKT_TYPE_0RTT_V1 is 0RTT long header packet type for QUIC + v1. */ +#define NGTCP2_PKT_TYPE_0RTT_V1 0x1 +/* NGTCP2_PKT_TYPE_HANDSHAKE_V1 is Handshake long header packet type + for QUIC v1. */ +#define NGTCP2_PKT_TYPE_HANDSHAKE_V1 0x2 +/* NGTCP2_PKT_TYPE_RETRY_V1 is Retry long header packet type for QUIC + v1. */ +#define NGTCP2_PKT_TYPE_RETRY_V1 0x3 + +/* NGTCP2_PKT_TYPE_INITIAL_V2 is Initial long header packet type for + QUIC v2. */ +#define NGTCP2_PKT_TYPE_INITIAL_V2 0x1 +/* NGTCP2_PKT_TYPE_0RTT_V2 is 0RTT long header packet type for QUIC + v2. */ +#define NGTCP2_PKT_TYPE_0RTT_V2 0x2 +/* NGTCP2_PKT_TYPE_HANDSHAKE_V2 is Handshake long header packet type + for QUIC v2. */ +#define NGTCP2_PKT_TYPE_HANDSHAKE_V2 0x3 +/* NGTCP2_PKT_TYPE_RETRY_V2 is Retry long header packet type for QUIC + v2. */ +#define NGTCP2_PKT_TYPE_RETRY_V2 0x0 + +typedef struct ngtcp2_pkt_retry { + ngtcp2_cid odcid; + uint8_t *token; + size_t tokenlen; + uint8_t tag[NGTCP2_RETRY_TAGLEN]; +} ngtcp2_pkt_retry; + +typedef enum { + NGTCP2_FRAME_PADDING = 0x00, + NGTCP2_FRAME_PING = 0x01, + NGTCP2_FRAME_ACK = 0x02, + NGTCP2_FRAME_ACK_ECN = 0x03, + NGTCP2_FRAME_RESET_STREAM = 0x04, + NGTCP2_FRAME_STOP_SENDING = 0x05, + NGTCP2_FRAME_CRYPTO = 0x06, + NGTCP2_FRAME_NEW_TOKEN = 0x07, + NGTCP2_FRAME_STREAM = 0x08, + NGTCP2_FRAME_MAX_DATA = 0x10, + NGTCP2_FRAME_MAX_STREAM_DATA = 0x11, + NGTCP2_FRAME_MAX_STREAMS_BIDI = 0x12, + NGTCP2_FRAME_MAX_STREAMS_UNI = 0x13, + NGTCP2_FRAME_DATA_BLOCKED = 0x14, + NGTCP2_FRAME_STREAM_DATA_BLOCKED = 0x15, + NGTCP2_FRAME_STREAMS_BLOCKED_BIDI = 0x16, + NGTCP2_FRAME_STREAMS_BLOCKED_UNI = 0x17, + NGTCP2_FRAME_NEW_CONNECTION_ID = 0x18, + NGTCP2_FRAME_RETIRE_CONNECTION_ID = 0x19, + NGTCP2_FRAME_PATH_CHALLENGE = 0x1a, + NGTCP2_FRAME_PATH_RESPONSE = 0x1b, + NGTCP2_FRAME_CONNECTION_CLOSE = 0x1c, + NGTCP2_FRAME_CONNECTION_CLOSE_APP = 0x1d, + NGTCP2_FRAME_HANDSHAKE_DONE = 0x1e, + NGTCP2_FRAME_DATAGRAM = 0x30, + NGTCP2_FRAME_DATAGRAM_LEN = 0x31, +} ngtcp2_frame_type; + +typedef struct ngtcp2_stream { + uint8_t type; + /** + * flags of decoded STREAM frame. This gets ignored when encoding + * STREAM frame. + */ + uint8_t flags; + uint8_t fin; + int64_t stream_id; + uint64_t offset; + /* datacnt is the number of elements that data contains. Although + the length of data is 1 in this definition, the library may + allocate extra bytes to hold more elements. */ + size_t datacnt; + /* data is the array of ngtcp2_vec which references data. */ + ngtcp2_vec data[1]; +} ngtcp2_stream; + +typedef struct ngtcp2_ack_range { + uint64_t gap; + uint64_t len; +} ngtcp2_ack_range; + +typedef struct ngtcp2_ack { + uint8_t type; + int64_t largest_ack; + uint64_t ack_delay; + /** + * ack_delay_unscaled is an ack_delay multiplied by + * 2**ack_delay_component * NGTCP2_MICROSECONDS. + */ + ngtcp2_duration ack_delay_unscaled; + struct { + uint64_t ect0; + uint64_t ect1; + uint64_t ce; + } ecn; + uint64_t first_ack_range; + size_t rangecnt; + ngtcp2_ack_range ranges[1]; +} ngtcp2_ack; + +typedef struct ngtcp2_padding { + uint8_t type; + /** + * The length of contiguous PADDING frames. + */ + size_t len; +} ngtcp2_padding; + +typedef struct ngtcp2_reset_stream { + uint8_t type; + int64_t stream_id; + uint64_t app_error_code; + uint64_t final_size; +} ngtcp2_reset_stream; + +typedef struct ngtcp2_connection_close { + uint8_t type; + uint64_t error_code; + uint64_t frame_type; + size_t reasonlen; + uint8_t *reason; +} ngtcp2_connection_close; + +typedef struct ngtcp2_max_data { + uint8_t type; + /** + * max_data is Maximum Data. + */ + uint64_t max_data; +} ngtcp2_max_data; + +typedef struct ngtcp2_max_stream_data { + uint8_t type; + int64_t stream_id; + uint64_t max_stream_data; +} ngtcp2_max_stream_data; + +typedef struct ngtcp2_max_streams { + uint8_t type; + uint64_t max_streams; +} ngtcp2_max_streams; + +typedef struct ngtcp2_ping { + uint8_t type; +} ngtcp2_ping; + +typedef struct ngtcp2_data_blocked { + uint8_t type; + uint64_t offset; +} ngtcp2_data_blocked; + +typedef struct ngtcp2_stream_data_blocked { + uint8_t type; + int64_t stream_id; + uint64_t offset; +} ngtcp2_stream_data_blocked; + +typedef struct ngtcp2_streams_blocked { + uint8_t type; + uint64_t max_streams; +} ngtcp2_streams_blocked; + +typedef struct ngtcp2_new_connection_id { + uint8_t type; + uint64_t seq; + uint64_t retire_prior_to; + ngtcp2_cid cid; + uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN]; +} ngtcp2_new_connection_id; + +typedef struct ngtcp2_stop_sending { + uint8_t type; + int64_t stream_id; + uint64_t app_error_code; +} ngtcp2_stop_sending; + +typedef struct ngtcp2_path_challenge { + uint8_t type; + uint8_t data[NGTCP2_PATH_CHALLENGE_DATALEN]; +} ngtcp2_path_challenge; + +typedef struct ngtcp2_path_response { + uint8_t type; + uint8_t data[NGTCP2_PATH_CHALLENGE_DATALEN]; +} ngtcp2_path_response; + +typedef struct ngtcp2_crypto { + uint8_t type; + uint64_t offset; + /* datacnt is the number of elements that data contains. Although + the length of data is 1 in this definition, the library may + allocate extra bytes to hold more elements. */ + size_t datacnt; + /* data is the array of ngtcp2_vec which references data. */ + ngtcp2_vec data[1]; +} ngtcp2_crypto; + +typedef struct ngtcp2_new_token { + uint8_t type; + uint8_t *token; + size_t tokenlen; +} ngtcp2_new_token; + +typedef struct ngtcp2_retire_connection_id { + uint8_t type; + uint64_t seq; +} ngtcp2_retire_connection_id; + +typedef struct ngtcp2_handshake_done { + uint8_t type; +} ngtcp2_handshake_done; + +typedef struct ngtcp2_datagram { + uint8_t type; + /* dgram_id is an opaque identifier chosen by an application. */ + uint64_t dgram_id; + /* datacnt is the number of elements that data contains. */ + size_t datacnt; + /* data is a pointer to ngtcp2_vec array that stores data. */ + ngtcp2_vec *data; + /* rdata is conveniently embedded to ngtcp2_datagram, so that data + field can just point to the address of this field to store a + single vector which is the case when DATAGRAM is received from a + remote endpoint. */ + ngtcp2_vec rdata[1]; +} ngtcp2_datagram; + +typedef union ngtcp2_frame { + uint8_t type; + ngtcp2_stream stream; + ngtcp2_ack ack; + ngtcp2_padding padding; + ngtcp2_reset_stream reset_stream; + ngtcp2_connection_close connection_close; + ngtcp2_max_data max_data; + ngtcp2_max_stream_data max_stream_data; + ngtcp2_max_streams max_streams; + ngtcp2_ping ping; + ngtcp2_data_blocked data_blocked; + ngtcp2_stream_data_blocked stream_data_blocked; + ngtcp2_streams_blocked streams_blocked; + ngtcp2_new_connection_id new_connection_id; + ngtcp2_stop_sending stop_sending; + ngtcp2_path_challenge path_challenge; + ngtcp2_path_response path_response; + ngtcp2_crypto crypto; + ngtcp2_new_token new_token; + ngtcp2_retire_connection_id retire_connection_id; + ngtcp2_handshake_done handshake_done; + ngtcp2_datagram datagram; +} ngtcp2_frame; + +typedef struct ngtcp2_pkt_chain ngtcp2_pkt_chain; + +/* + * ngtcp2_pkt_chain is the chain of incoming packets buffered. + */ +struct ngtcp2_pkt_chain { + ngtcp2_path_storage path; + ngtcp2_pkt_info pi; + ngtcp2_pkt_chain *next; + uint8_t *pkt; + /* pktlen is length of a QUIC packet. */ + size_t pktlen; + /* dgramlen is length of UDP datagram that a QUIC packet is + included. */ + size_t dgramlen; + ngtcp2_tstamp ts; +}; + +/* + * ngtcp2_pkt_chain_new allocates ngtcp2_pkt_chain objects, and + * assigns its pointer to |*ppc|. The content of buffer pointed by + * |pkt| of length |pktlen| is copied into |*ppc|. The packet is + * obtained via the network |path|. The values of path->local and + * path->remote are copied into |*ppc|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_pkt_chain_new(ngtcp2_pkt_chain **ppc, const ngtcp2_path *path, + const ngtcp2_pkt_info *pi, const uint8_t *pkt, + size_t pktlen, size_t dgramlen, ngtcp2_tstamp ts, + const ngtcp2_mem *mem); + +/* + * ngtcp2_pkt_chain_del deallocates |pc|. It also frees the memory + * pointed by |pc|. + */ +void ngtcp2_pkt_chain_del(ngtcp2_pkt_chain *pc, const ngtcp2_mem *mem); + +/* + * ngtcp2_pkt_hd_init initializes |hd| with the given values. If + * |dcid| and/or |scid| is NULL, DCID and SCID of |hd| is empty + * respectively. |pkt_numlen| is the number of bytes used to encode + * |pkt_num| and either 1, 2, or 4. |version| is QUIC version for + * long header. |len| is the length field of Initial, 0RTT, and + * Handshake packets. + */ +void ngtcp2_pkt_hd_init(ngtcp2_pkt_hd *hd, uint8_t flags, uint8_t type, + const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + int64_t pkt_num, size_t pkt_numlen, uint32_t version, + size_t len); + +/* + * ngtcp2_pkt_encode_hd_long encodes |hd| as QUIC long header into + * |out| which has length |outlen|. It returns the number of bytes + * written into |outlen| if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer is too short + */ +ngtcp2_ssize ngtcp2_pkt_encode_hd_long(uint8_t *out, size_t outlen, + const ngtcp2_pkt_hd *hd); + +/* + * ngtcp2_pkt_encode_hd_short encodes |hd| as QUIC short header into + * |out| which has length |outlen|. It returns the number of bytes + * written into |outlen| if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer is too short + */ +ngtcp2_ssize ngtcp2_pkt_encode_hd_short(uint8_t *out, size_t outlen, + const ngtcp2_pkt_hd *hd); + +/** + * @function + * + * `ngtcp2_pkt_decode_frame` decodes a QUIC frame from the buffer + * pointed by |payload| whose length is |payloadlen|. + * + * This function returns the number of bytes read to decode a single + * frame if it succeeds, or one of the following negative error codes: + * + * :enum:`NGTCP2_ERR_FRAME_ENCODING` + * Frame is badly formatted; or frame type is unknown; or + * |payloadlen| is 0. + */ +ngtcp2_ssize ngtcp2_pkt_decode_frame(ngtcp2_frame *dest, const uint8_t *payload, + size_t payloadlen); + +/** + * @function + * + * `ngtcp2_pkt_encode_frame` encodes a frame |fm| into the buffer + * pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written to the buffer, or + * one of the following negative error codes: + * + * :enum:`NGTCP2_ERR_NOBUF` + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_frame(uint8_t *out, size_t outlen, + ngtcp2_frame *fr); + +/* + * ngtcp2_pkt_decode_version_negotiation decodes Version Negotiation + * packet payload |payload| of length |payloadlen|, and stores the + * result in |dest|. |dest| must have enough capacity to store the + * result. |payloadlen| also must be a multiple of sizeof(uint32_t). + * + * This function returns the number of versions written in |dest|. + */ +size_t ngtcp2_pkt_decode_version_negotiation(uint32_t *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_stateless_reset decodes Stateless Reset payload + * |payload| of length |payloadlen|. The |payload| must start with + * Stateless Reset Token. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * Payloadlen is too short. + */ +int ngtcp2_pkt_decode_stateless_reset(ngtcp2_pkt_stateless_reset *sr, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_retry decodes Retry packet payload |payload| of + * length |payloadlen|. The |payload| must start with Retry token + * field. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_INVALID_ARGUMENT + * Payloadlen is too short. + */ +int ngtcp2_pkt_decode_retry(ngtcp2_pkt_retry *dest, const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_stream_frame decodes STREAM frame from |payload| + * of length |payloadlen|. The result is stored in the object pointed + * by |dest|. STREAM frame must start at payload[0]. This function + * finishes when it decodes one STREAM frame, and returns the exact + * number of bytes read to decode a frame if it succeeds, or one of + * the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include STREAM frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_stream_frame(ngtcp2_stream *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_ack_frame decodes ACK frame from |payload| of + * length |payloadlen|. The result is stored in the object pointed by + * |dest|. ACK frame must start at payload[0]. This function + * finishes when it decodes one ACK frame, and returns the exact + * number of bytes read to decode a frame if it succeeds, or one of + * the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include ACK frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_ack_frame(ngtcp2_ack *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_padding_frame decodes contiguous PADDING frames + * from |payload| of length |payloadlen|. It continues to parse + * frames as long as the frame type is PADDING. This finishes when it + * encounters the frame type which is not PADDING, or all input data + * is read. The first byte (payload[0]) must be NGTCP2_FRAME_PADDING. + * This function returns the exact number of bytes read to decode + * PADDING frames. + */ +ngtcp2_ssize ngtcp2_pkt_decode_padding_frame(ngtcp2_padding *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_reset_stream_frame decodes RESET_STREAM frame + * from |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. RESET_STREAM frame must start at + * payload[0]. This function finishes when it decodes one + * RESET_STREAM frame, and returns the exact number of bytes read to + * decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include RESET_STREAM frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_connection_close_frame decodes CONNECTION_CLOSE + * frame from |payload| of length |payloadlen|. The result is stored + * in the object pointed by |dest|. CONNECTION_CLOSE frame must start + * at payload[0]. This function finishes it decodes one + * CONNECTION_CLOSE frame, and returns the exact number of bytes read + * to decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include CONNECTION_CLOSE frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame( + ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen); + +/* + * ngtcp2_pkt_decode_max_data_frame decodes MAX_DATA frame from + * |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. MAX_DATA frame must start at payload[0]. + * This function finishes when it decodes one MAX_DATA frame, and + * returns the exact number of bytes read to decode a frame if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include MAX_DATA frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_max_data_frame(ngtcp2_max_data *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_max_stream_data_frame decodes MAX_STREAM_DATA + * frame from |payload| of length |payloadlen|. The result is stored + * in the object pointed by |dest|. MAX_STREAM_DATA frame must start + * at payload[0]. This function finishes when it decodes one + * MAX_STREAM_DATA frame, and returns the exact number of bytes read + * to decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include MAX_STREAM_DATA frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_max_stream_data_frame( + ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen); + +/* + * ngtcp2_pkt_decode_max_streams_frame decodes MAX_STREAMS frame from + * |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. MAX_STREAMS frame must start at + * payload[0]. This function finishes when it decodes one MAX_STREAMS + * frame, and returns the exact number of bytes read to decode a frame + * if it succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include MAX_STREAMS frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_max_streams_frame(ngtcp2_max_streams *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_ping_frame decodes PING frame from |payload| of + * length |payloadlen|. The result is stored in the object pointed by + * |dest|. PING frame must start at payload[0]. This function + * finishes when it decodes one PING frame, and returns the exact + * number of bytes read to decode a frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_ping_frame(ngtcp2_ping *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_data_blocked_frame decodes DATA_BLOCKED frame + * from |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. DATA_BLOCKED frame must start at + * payload[0]. This function finishes when it decodes one + * DATA_BLOCKED frame, and returns the exact number of bytes read to + * decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include DATA_BLOCKED frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_data_blocked_frame(ngtcp2_data_blocked *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_stream_data_blocked_frame decodes + * STREAM_DATA_BLOCKED frame from |payload| of length |payloadlen|. + * The result is stored in the object pointed by |dest|. + * STREAM_DATA_BLOCKED frame must start at payload[0]. This function + * finishes when it decodes one STREAM_DATA_BLOCKED frame, and returns + * the exact number of bytes read to decode a frame if it succeeds, or + * one of the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include STREAM_DATA_BLOCKED frame. + */ +ngtcp2_ssize +ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_streams_blocked_frame decodes STREAMS_BLOCKED + * frame from |payload| of length |payloadlen|. The result is stored + * in the object pointed by |dest|. STREAMS_BLOCKED frame must start + * at payload[0]. This function finishes when it decodes one + * STREAMS_BLOCKED frame, and returns the exact number of bytes read + * to decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include STREAMS_BLOCKED frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame( + ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen); + +/* + * ngtcp2_pkt_decode_new_connection_id_frame decodes NEW_CONNECTION_ID + * frame from |payload| of length |payloadlen|. The result is stored + * in the object pointed by |dest|. NEW_CONNECTION_ID frame must + * start at payload[0]. This function finishes when it decodes one + * NEW_CONNECTION_ID frame, and returns the exact number of bytes read + * to decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include NEW_CONNECTION_ID frame; or the + * length of CID is strictly less than NGTCP2_MIN_CIDLEN or + * greater than NGTCP2_MAX_CIDLEN. + */ +ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame( + ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen); + +/* + * ngtcp2_pkt_decode_stop_sending_frame decodes STOP_SENDING frame + * from |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. STOP_SENDING frame must start at + * payload[0]. This function finishes when it decodes one + * STOP_SENDING frame, and returns the exact number of bytes read to + * decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include STOP_SENDING frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_stop_sending_frame(ngtcp2_stop_sending *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_path_challenge_frame decodes PATH_CHALLENGE frame + * from |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. PATH_CHALLENGE frame must start at + * payload[0]. This function finishes when it decodes one + * PATH_CHALLENGE frame, and returns the exact number of bytes read to + * decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include PATH_CHALLENGE frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_path_challenge_frame(ngtcp2_path_challenge *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_path_response_frame decodes PATH_RESPONSE frame + * from |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. PATH_RESPONSE frame must start at + * payload[0]. This function finishes when it decodes one + * PATH_RESPONSE frame, and returns the exact number of bytes read to + * decode a frame if it succeeds, or one of the following negative + * error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include PATH_RESPONSE frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_path_response_frame(ngtcp2_path_response *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_crypto_frame decodes CRYPTO frame from |payload| + * of length |payloadlen|. The result is stored in the object pointed + * by |dest|. CRYPTO frame must start at payload[0]. This function + * finishes when it decodes one CRYPTO frame, and returns the exact + * number of bytes read to decode a frame if it succeeds, or one of + * the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include CRYPTO frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_crypto_frame(ngtcp2_crypto *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_new_token_frame decodes NEW_TOKEN frame from + * |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. NEW_TOKEN frame must start at + * payload[0]. This function finishes when it decodes one NEW_TOKEN + * frame, and returns the exact number of bytes read to decode a frame + * if it succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include NEW_TOKEN frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_new_token_frame(ngtcp2_new_token *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_retire_connection_id_frame decodes RETIRE_CONNECTION_ID + * frame from |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. RETIRE_CONNECTION_ID frame must start at + * payload[0]. This function finishes when it decodes one RETIRE_CONNECTION_ID + * frame, and returns the exact number of bytes read to decode a frame + * if it succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include RETIRE_CONNECTION_ID frame. + */ +ngtcp2_ssize +ngtcp2_pkt_decode_retire_connection_id_frame(ngtcp2_retire_connection_id *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_handshake_done_frame decodes HANDSHAKE_DONE frame + * from |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. HANDSHAKE_DONE frame must start at + * payload[0]. This function finishes when it decodes one + * HANDSHAKE_DONE frame, and returns the exact number of bytes read to + * decode a frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_handshake_done_frame(ngtcp2_handshake_done *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_decode_datagram_frame decodes DATAGRAM frame from + * |payload| of length |payloadlen|. The result is stored in the + * object pointed by |dest|. DATAGRAM frame must start at payload[0]. + * This function finishes when it decodes one DATAGRAM frame, and + * returns the exact number of bytes read to decode a frame if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_FRAME_ENCODING + * Payload is too short to include DATAGRAM frame. + */ +ngtcp2_ssize ngtcp2_pkt_decode_datagram_frame(ngtcp2_datagram *dest, + const uint8_t *payload, + size_t payloadlen); + +/* + * ngtcp2_pkt_encode_stream_frame encodes STREAM frame |fr| into the + * buffer pointed by |out| of length |outlen|. + * + * This function assigns <the serialized frame type> & + * ~NGTCP2_FRAME_STREAM to fr->flags. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_stream_frame(uint8_t *out, size_t outlen, + ngtcp2_stream *fr); + +/* + * ngtcp2_pkt_encode_ack_frame encodes ACK frame |fr| into the buffer + * pointed by |out| of length |outlen|. + * + * This function assigns <the serialized frame type> & + * ~NGTCP2_FRAME_ACK to fr->flags. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_ack_frame(uint8_t *out, size_t outlen, + ngtcp2_ack *fr); + +/* + * ngtcp2_pkt_encode_padding_frame encodes PADDING frame |fr| into the + * buffer pointed by |out| of length |outlen|. + * + * This function encodes consecutive fr->len PADDING frames. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write frame(s). + */ +ngtcp2_ssize ngtcp2_pkt_encode_padding_frame(uint8_t *out, size_t outlen, + const ngtcp2_padding *fr); + +/* + * ngtcp2_pkt_encode_reset_stream_frame encodes RESET_STREAM frame + * |fr| into the buffer pointed by |out| of length |buflen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_reset_stream_frame(uint8_t *out, size_t outlen, + const ngtcp2_reset_stream *fr); + +/* + * ngtcp2_pkt_encode_connection_close_frame encodes CONNECTION_CLOSE + * frame |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_connection_close_frame(uint8_t *out, size_t outlen, + const ngtcp2_connection_close *fr); + +/* + * ngtcp2_pkt_encode_max_data_frame encodes MAX_DATA frame |fr| into + * the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_max_data_frame(uint8_t *out, size_t outlen, + const ngtcp2_max_data *fr); + +/* + * ngtcp2_pkt_encode_max_stream_data_frame encodes MAX_STREAM_DATA + * frame |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_max_stream_data_frame(uint8_t *out, size_t outlen, + const ngtcp2_max_stream_data *fr); + +/* + * ngtcp2_pkt_encode_max_streams_frame encodes MAX_STREAMS + * frame |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_max_streams_frame(uint8_t *out, size_t outlen, + const ngtcp2_max_streams *fr); + +/* + * ngtcp2_pkt_encode_ping_frame encodes PING frame |fr| into the + * buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_ping_frame(uint8_t *out, size_t outlen, + const ngtcp2_ping *fr); + +/* + * ngtcp2_pkt_encode_data_blocked_frame encodes DATA_BLOCKED frame + * |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_data_blocked_frame(uint8_t *out, size_t outlen, + const ngtcp2_data_blocked *fr); + +/* + * ngtcp2_pkt_encode_stream_data_blocked_frame encodes + * STREAM_DATA_BLOCKED frame |fr| into the buffer pointed by |out| of + * length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_stream_data_blocked_frame( + uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr); + +/* + * ngtcp2_pkt_encode_streams_blocked_frame encodes STREAMS_BLOCKED + * frame |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_streams_blocked_frame(uint8_t *out, size_t outlen, + const ngtcp2_streams_blocked *fr); + +/* + * ngtcp2_pkt_encode_new_connection_id_frame encodes NEW_CONNECTION_ID + * frame |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_new_connection_id_frame(uint8_t *out, size_t outlen, + const ngtcp2_new_connection_id *fr); + +/* + * ngtcp2_pkt_encode_stop_sending_frame encodes STOP_SENDING frame + * |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_stop_sending_frame(uint8_t *out, size_t outlen, + const ngtcp2_stop_sending *fr); + +/* + * ngtcp2_pkt_encode_path_challenge_frame encodes PATH_CHALLENGE frame + * |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_path_challenge_frame(uint8_t *out, size_t outlen, + const ngtcp2_path_challenge *fr); + +/* + * ngtcp2_pkt_encode_path_response_frame encodes PATH_RESPONSE frame + * |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_path_response_frame(uint8_t *out, size_t outlen, + const ngtcp2_path_response *fr); + +/* + * ngtcp2_pkt_encode_crypto_frame encodes CRYPTO frame |fr| into the + * buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_crypto_frame(uint8_t *out, size_t outlen, + const ngtcp2_crypto *fr); + +/* + * ngtcp2_pkt_encode_new_token_frame encodes NEW_TOKEN frame |fr| into + * the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_new_token_frame(uint8_t *out, size_t outlen, + const ngtcp2_new_token *fr); + +/* + * ngtcp2_pkt_encode_retire_connection_id_frame encodes RETIRE_CONNECTION_ID + * frame |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_retire_connection_id_frame( + uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr); + +/* + * ngtcp2_pkt_encode_handshake_done_frame encodes HANDSHAKE_DONE frame + * |fr| into the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize +ngtcp2_pkt_encode_handshake_done_frame(uint8_t *out, size_t outlen, + const ngtcp2_handshake_done *fr); + +/* + * ngtcp2_pkt_encode_datagram_frame encodes DATAGRAM frame |fr| into + * the buffer pointed by |out| of length |outlen|. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * NGTCP2_ERR_NOBUF + * Buffer does not have enough capacity to write a frame. + */ +ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen, + const ngtcp2_datagram *fr); + +/* + * ngtcp2_pkt_adjust_pkt_num find the full 64 bits packet number for + * |pkt_num|, which is expected to be least significant |n| bits. The + * |max_pkt_num| is the highest successfully authenticated packet + * number. + */ +int64_t ngtcp2_pkt_adjust_pkt_num(int64_t max_pkt_num, int64_t pkt_num, + size_t n); + +/* + * ngtcp2_pkt_validate_ack checks that ack is malformed or not. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_ACK_FRAME + * ACK frame is malformed + */ +int ngtcp2_pkt_validate_ack(ngtcp2_ack *fr); + +/* + * ngtcp2_pkt_stream_max_datalen returns the maximum number of bytes + * which can be sent for stream denoted by |stream_id|. |offset| is + * an offset of within the stream. |len| is the estimated number of + * bytes to be sent. |left| is the size of buffer. If |left| is too + * small to write STREAM frame, this function returns (size_t)-1. + */ +size_t ngtcp2_pkt_stream_max_datalen(int64_t stream_id, uint64_t offset, + uint64_t len, size_t left); + +/* + * ngtcp2_pkt_crypto_max_datalen returns the maximum number of bytes + * which can be sent for crypto stream. |offset| is an offset of + * within the crypto stream. |len| is the estimated number of bytes + * to be sent. |left| is the size of buffer. If |left| is too small + * to write CRYPTO frame, this function returns (size_t)-1. + */ +size_t ngtcp2_pkt_crypto_max_datalen(uint64_t offset, size_t len, size_t left); + +/* + * ngtcp2_pkt_datagram_framelen returns the length of DATAGRAM frame + * to encode |len| bytes of data. + */ +size_t ngtcp2_pkt_datagram_framelen(size_t len); + +/* + * ngtcp2_pkt_verify_reserved_bits verifies that the first byte |c| of + * the packet header has the correct reserved bits. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_PROTO + * Reserved bits has wrong value. + */ +int ngtcp2_pkt_verify_reserved_bits(uint8_t c); + +/* + * ngtcp2_pkt_encode_pseudo_retry encodes Retry pseudo-packet in the + * buffer pointed by |dest| of length |destlen|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_BUF + * Buffer is too short. + */ +ngtcp2_ssize ngtcp2_pkt_encode_pseudo_retry( + uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused, + const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen); + +/* + * ngtcp2_pkt_verify_retry_tag verifies Retry packet. The buffer + * pointed by |pkt| of length |pktlen| must contain Retry packet + * including packet header. The odcid and tag fields of |retry| must + * be specified. |aead| must be AEAD_AES_128_GCM. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_PROTO + * Verification failed. + */ +int ngtcp2_pkt_verify_retry_tag(uint32_t version, const ngtcp2_pkt_retry *retry, + const uint8_t *pkt, size_t pktlen, + ngtcp2_encrypt encrypt, + const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx); + +/* + * ngtcp2_pkt_versioned_type returns versioned packet type for a + * version |version| that corresponds to the version-independent + * |pkt_type|. + */ +uint8_t ngtcp2_pkt_versioned_type(uint32_t version, uint32_t pkt_type); + +/** + * @function + * + * `ngtcp2_pkt_get_type_long` returns the version-independent long + * packet type. |version| is the QUIC version. |c| is the first byte + * of Long packet header. If |version| is not supported by the + * library, it returns 0. + */ +uint8_t ngtcp2_pkt_get_type_long(uint32_t version, uint8_t c); + +#endif /* NGTCP2_PKT_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c new file mode 100644 index 0000000..771ef5e --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c @@ -0,0 +1,160 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_pmtud.h" + +#include <assert.h> + +#include "ngtcp2_mem.h" +#include "ngtcp2_macro.h" + +/* NGTCP2_PMTUD_PROBE_NUM_MAX is the maximum number of packets sent + for each probe. */ +#define NGTCP2_PMTUD_PROBE_NUM_MAX 3 + +static size_t mtu_probes[] = { + 1454 - 48, /* The well known MTU used by a domestic optic fiber + service in Japan. */ + 1390 - 48, /* Typical Tunneled MTU */ + 1280 - 48, /* IPv6 minimum MTU */ + 1492 - 48, /* PPPoE */ +}; + +#define NGTCP2_MTU_PROBESLEN ngtcp2_arraylen(mtu_probes) + +int ngtcp2_pmtud_new(ngtcp2_pmtud **ppmtud, size_t max_udp_payload_size, + size_t hard_max_udp_payload_size, int64_t tx_pkt_num, + const ngtcp2_mem *mem) { + ngtcp2_pmtud *pmtud = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pmtud)); + + if (pmtud == NULL) { + return NGTCP2_ERR_NOMEM; + } + + pmtud->mem = mem; + pmtud->mtu_idx = 0; + pmtud->num_pkts_sent = 0; + pmtud->expiry = UINT64_MAX; + pmtud->tx_pkt_num = tx_pkt_num; + pmtud->max_udp_payload_size = max_udp_payload_size; + pmtud->hard_max_udp_payload_size = hard_max_udp_payload_size; + pmtud->min_fail_udp_payload_size = SIZE_MAX; + + for (; pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN; ++pmtud->mtu_idx) { + if (mtu_probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) { + continue; + } + if (mtu_probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) { + break; + } + } + + *ppmtud = pmtud; + + return 0; +} + +void ngtcp2_pmtud_del(ngtcp2_pmtud *pmtud) { + if (!pmtud) { + return; + } + + ngtcp2_mem_free(pmtud->mem, pmtud); +} + +size_t ngtcp2_pmtud_probelen(ngtcp2_pmtud *pmtud) { + assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN); + + return mtu_probes[pmtud->mtu_idx]; +} + +void ngtcp2_pmtud_probe_sent(ngtcp2_pmtud *pmtud, ngtcp2_duration pto, + ngtcp2_tstamp ts) { + ngtcp2_tstamp timeout; + + if (++pmtud->num_pkts_sent < NGTCP2_PMTUD_PROBE_NUM_MAX) { + timeout = pto; + } else { + timeout = 3 * pto; + } + + pmtud->expiry = ts + timeout; +} + +int ngtcp2_pmtud_require_probe(ngtcp2_pmtud *pmtud) { + return pmtud->expiry == UINT64_MAX; +} + +static void pmtud_next_probe(ngtcp2_pmtud *pmtud) { + assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN); + + ++pmtud->mtu_idx; + pmtud->num_pkts_sent = 0; + pmtud->expiry = UINT64_MAX; + + for (; pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN; ++pmtud->mtu_idx) { + if (mtu_probes[pmtud->mtu_idx] <= pmtud->max_udp_payload_size || + mtu_probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) { + continue; + } + + if (mtu_probes[pmtud->mtu_idx] < pmtud->min_fail_udp_payload_size) { + break; + } + } +} + +void ngtcp2_pmtud_probe_success(ngtcp2_pmtud *pmtud, size_t payloadlen) { + pmtud->max_udp_payload_size = + ngtcp2_max(pmtud->max_udp_payload_size, payloadlen); + + assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN); + + if (mtu_probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) { + return; + } + + pmtud_next_probe(pmtud); +} + +void ngtcp2_pmtud_handle_expiry(ngtcp2_pmtud *pmtud, ngtcp2_tstamp ts) { + if (ts < pmtud->expiry) { + return; + } + + pmtud->expiry = UINT64_MAX; + + if (pmtud->num_pkts_sent < NGTCP2_PMTUD_PROBE_NUM_MAX) { + return; + } + + pmtud->min_fail_udp_payload_size = + ngtcp2_min(pmtud->min_fail_udp_payload_size, mtu_probes[pmtud->mtu_idx]); + + pmtud_next_probe(pmtud); +} + +int ngtcp2_pmtud_finished(ngtcp2_pmtud *pmtud) { + return pmtud->mtu_idx >= NGTCP2_MTU_PROBESLEN; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h new file mode 100644 index 0000000..6b2e691 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h @@ -0,0 +1,123 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_PMTUD_H +#define NGTCP2_PMTUD_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +typedef struct ngtcp2_pmtud { + const ngtcp2_mem *mem; + /* mtu_idx is the index of UDP payload size candidates to try + out. */ + size_t mtu_idx; + /* num_pkts_sent is the number of mtu_idx sized UDP datagram payload + sent */ + size_t num_pkts_sent; + /* expiry is the expired, if it is reached, send out the next UDP + datagram. UINT64_MAX means no expiry, or expiration is canceled. + In either case, new probe packet should be sent unless we have + done all attempts. */ + ngtcp2_tstamp expiry; + /* tx_pkt_num is the smallest outgoing packet number where the + current discovery is performed. In other words, acknowledging + packet whose packet number lower than that does not indicate the + success of Path MTU Discovery. */ + int64_t tx_pkt_num; + /* max_udp_payload_size is the maximum UDP payload size which is + known to work. */ + size_t max_udp_payload_size; + /* hard_max_udp_payload_size is the maximum UDP payload size that is + going to be probed. */ + size_t hard_max_udp_payload_size; + /* min_fail_udp_payload_size is the minimum UDP payload size that is + known to fail. */ + size_t min_fail_udp_payload_size; +} ngtcp2_pmtud; + +/* + * ngtcp2_pmtud_new creates new ngtcp2_pmtud object, and assigns its + * pointer to |*ppmtud|. |max_udp_payload_size| is the maximum UDP + * payload size that is known to work for the current path. + * |tx_pkt_num| should be the next packet number to send, which is + * used to differentiate the PMTUD probe packet sent by the previous + * PMTUD. PMTUD might finish immediately if |max_udp_payload_size| is + * larger than or equal to all UDP payload probe candidates. + * Therefore, call ngtcp2_pmtud_finished to check this situation. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_pmtud_new(ngtcp2_pmtud **ppmtud, size_t max_udp_payload_size, + size_t hard_max_udp_payload_size, int64_t tx_pkt_num, + const ngtcp2_mem *mem); + +/* + * ngtcp2_pmtud_del deletes |pmtud|. + */ +void ngtcp2_pmtud_del(ngtcp2_pmtud *pmtud); + +/* + * ngtcp2_pmtud_probelen returns the length of UDP payload size for a + * PMTUD probe packet. + */ +size_t ngtcp2_pmtud_probelen(ngtcp2_pmtud *pmtud); + +/* + * ngtcp2_pmtud_probe_sent should be invoked when a PMTUD probe packet is + * sent. + */ +void ngtcp2_pmtud_probe_sent(ngtcp2_pmtud *pmtud, ngtcp2_duration pto, + ngtcp2_tstamp ts); + +/* + * ngtcp2_pmtud_require_probe returns nonzero if a PMTUD probe packet + * should be sent. + */ +int ngtcp2_pmtud_require_probe(ngtcp2_pmtud *pmtud); + +/* + * ngtcp2_pmtud_probe_success should be invoked when a PMTUD probe + * UDP datagram sized |payloadlen| is acknowledged. + */ +void ngtcp2_pmtud_probe_success(ngtcp2_pmtud *pmtud, size_t payloadlen); + +/* + * ngtcp2_pmtud_handle_expiry handles expiry. + */ +void ngtcp2_pmtud_handle_expiry(ngtcp2_pmtud *pmtud, ngtcp2_tstamp ts); + +/* + * ngtcp2_pmtud_finished returns nonzero if PMTUD has finished. + */ +int ngtcp2_pmtud_finished(ngtcp2_pmtud *pmtud); + +#endif /* NGTCP2_PMTUD_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c new file mode 100644 index 0000000..ffba131 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c @@ -0,0 +1,230 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_ppe.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_str.h" +#include "ngtcp2_conv.h" + +void ngtcp2_ppe_init(ngtcp2_ppe *ppe, uint8_t *out, size_t outlen, + ngtcp2_crypto_cc *cc) { + ngtcp2_buf_init(&ppe->buf, out, outlen); + + ppe->hdlen = 0; + ppe->len_offset = 0; + ppe->pkt_num_offset = 0; + ppe->pkt_numlen = 0; + ppe->pkt_num = 0; + ppe->sample_offset = 0; + ppe->cc = cc; +} + +int ngtcp2_ppe_encode_hd(ngtcp2_ppe *ppe, const ngtcp2_pkt_hd *hd) { + ngtcp2_ssize rv; + ngtcp2_buf *buf = &ppe->buf; + ngtcp2_crypto_cc *cc = ppe->cc; + + if (ngtcp2_buf_left(buf) < cc->aead.max_overhead) { + return NGTCP2_ERR_NOBUF; + } + + if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) { + ppe->len_offset = 1 + 4 + 1 + hd->dcid.datalen + 1 + hd->scid.datalen; + if (hd->type == NGTCP2_PKT_INITIAL) { + ppe->len_offset += ngtcp2_put_uvarintlen(hd->tokenlen) + hd->tokenlen; + } + ppe->pkt_num_offset = ppe->len_offset + NGTCP2_PKT_LENGTHLEN; + rv = ngtcp2_pkt_encode_hd_long( + buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd); + } else { + ppe->pkt_num_offset = 1 + hd->dcid.datalen; + rv = ngtcp2_pkt_encode_hd_short( + buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd); + } + if (rv < 0) { + return (int)rv; + } + + ppe->sample_offset = ppe->pkt_num_offset + 4; + + buf->last += rv; + + ppe->pkt_numlen = hd->pkt_numlen; + ppe->hdlen = (size_t)rv; + + ppe->pkt_num = hd->pkt_num; + + return 0; +} + +int ngtcp2_ppe_encode_frame(ngtcp2_ppe *ppe, ngtcp2_frame *fr) { + ngtcp2_ssize rv; + ngtcp2_buf *buf = &ppe->buf; + ngtcp2_crypto_cc *cc = ppe->cc; + + if (ngtcp2_buf_left(buf) < cc->aead.max_overhead) { + return NGTCP2_ERR_NOBUF; + } + + rv = ngtcp2_pkt_encode_frame( + buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, fr); + if (rv < 0) { + return (int)rv; + } + + buf->last += rv; + + return 0; +} + +ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt) { + ngtcp2_buf *buf = &ppe->buf; + ngtcp2_crypto_cc *cc = ppe->cc; + uint8_t *payload = buf->begin + ppe->hdlen; + size_t payloadlen = ngtcp2_buf_len(buf) - ppe->hdlen; + uint8_t mask[NGTCP2_HP_SAMPLELEN]; + uint8_t *p; + size_t i; + int rv; + + assert(cc->encrypt); + assert(cc->hp_mask); + + if (ppe->len_offset) { + ngtcp2_put_uvarint30( + buf->begin + ppe->len_offset, + (uint16_t)(payloadlen + ppe->pkt_numlen + cc->aead.max_overhead)); + } + + ngtcp2_crypto_create_nonce(ppe->nonce, cc->ckm->iv.base, cc->ckm->iv.len, + ppe->pkt_num); + + rv = cc->encrypt(payload, &cc->aead, &cc->ckm->aead_ctx, payload, payloadlen, + ppe->nonce, cc->ckm->iv.len, buf->begin, ppe->hdlen); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + buf->last = payload + payloadlen + cc->aead.max_overhead; + + /* TODO Check that we have enough space to get sample */ + assert(ppe->sample_offset + NGTCP2_HP_SAMPLELEN <= ngtcp2_buf_len(buf)); + + rv = cc->hp_mask(mask, &cc->hp, &cc->hp_ctx, buf->begin + ppe->sample_offset); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + p = buf->begin; + if (*p & NGTCP2_HEADER_FORM_BIT) { + *p = (uint8_t)(*p ^ (mask[0] & 0x0f)); + } else { + *p = (uint8_t)(*p ^ (mask[0] & 0x1f)); + } + + p = buf->begin + ppe->pkt_num_offset; + for (i = 0; i < ppe->pkt_numlen; ++i) { + *(p + i) ^= mask[i + 1]; + } + + if (ppkt != NULL) { + *ppkt = buf->begin; + } + + return (ngtcp2_ssize)ngtcp2_buf_len(buf); +} + +size_t ngtcp2_ppe_left(ngtcp2_ppe *ppe) { + ngtcp2_crypto_cc *cc = ppe->cc; + + if (ngtcp2_buf_left(&ppe->buf) < cc->aead.max_overhead) { + return 0; + } + + return ngtcp2_buf_left(&ppe->buf) - cc->aead.max_overhead; +} + +size_t ngtcp2_ppe_pktlen(ngtcp2_ppe *ppe) { + ngtcp2_crypto_cc *cc = ppe->cc; + + return ngtcp2_buf_len(&ppe->buf) + cc->aead.max_overhead; +} + +size_t ngtcp2_ppe_padding(ngtcp2_ppe *ppe) { + ngtcp2_crypto_cc *cc = ppe->cc; + ngtcp2_buf *buf = &ppe->buf; + size_t len; + + assert(ngtcp2_buf_left(buf) >= cc->aead.max_overhead); + + len = ngtcp2_buf_left(buf) - cc->aead.max_overhead; + memset(buf->last, 0, len); + buf->last += len; + + return len; +} + +size_t ngtcp2_ppe_padding_hp_sample(ngtcp2_ppe *ppe) { + ngtcp2_crypto_cc *cc = ppe->cc; + ngtcp2_buf *buf = &ppe->buf; + size_t max_samplelen; + size_t len = 0; + + assert(cc->aead.max_overhead); + + max_samplelen = + ngtcp2_buf_len(buf) + cc->aead.max_overhead - ppe->sample_offset; + if (max_samplelen < NGTCP2_HP_SAMPLELEN) { + len = NGTCP2_HP_SAMPLELEN - max_samplelen; + assert(ngtcp2_ppe_left(ppe) >= len); + memset(buf->last, 0, len); + buf->last += len; + } + + return len; +} + +size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n) { + ngtcp2_crypto_cc *cc = ppe->cc; + ngtcp2_buf *buf = &ppe->buf; + size_t pktlen = ngtcp2_buf_len(buf) + cc->aead.max_overhead; + size_t len; + + if (pktlen >= n) { + return 0; + } + + len = n - pktlen; + buf->last = ngtcp2_setmem(buf->last, 0, len); + + return len; +} + +int ngtcp2_ppe_ensure_hp_sample(ngtcp2_ppe *ppe) { + ngtcp2_buf *buf = &ppe->buf; + return ngtcp2_buf_left(buf) >= (4 - ppe->pkt_numlen) + NGTCP2_HP_SAMPLELEN; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h new file mode 100644 index 0000000..bf220df --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h @@ -0,0 +1,153 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_PPE_H +#define NGTCP2_PPE_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_pkt.h" +#include "ngtcp2_buf.h" +#include "ngtcp2_crypto.h" + +/* + * ngtcp2_ppe is the Protected Packet Encoder. + */ +typedef struct ngtcp2_ppe { + ngtcp2_buf buf; + ngtcp2_crypto_cc *cc; + /* hdlen is the number of bytes for packet header written in buf. */ + size_t hdlen; + /* len_offset is the offset to Length field. */ + size_t len_offset; + /* pkt_num_offset is the offset to packet number field. */ + size_t pkt_num_offset; + /* pkt_numlen is the number of bytes used to encode a packet + number */ + size_t pkt_numlen; + /* sample_offset is the offset to sample for packet number + encryption. */ + size_t sample_offset; + /* pkt_num is the packet number written in buf. */ + int64_t pkt_num; + /* nonce is the buffer to store nonce. It should be equal or longer + than then length of IV. */ + uint8_t nonce[32]; +} ngtcp2_ppe; + +/* + * ngtcp2_ppe_init initializes |ppe| with the given buffer. + */ +void ngtcp2_ppe_init(ngtcp2_ppe *ppe, uint8_t *out, size_t outlen, + ngtcp2_crypto_cc *cc); + +/* + * ngtcp2_ppe_encode_hd encodes |hd|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOBUF + * The buffer is too small. + */ +int ngtcp2_ppe_encode_hd(ngtcp2_ppe *ppe, const ngtcp2_pkt_hd *hd); + +/* + * ngtcp2_ppe_encode_frame encodes |fr|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOBUF + * The buffer is too small. + */ +int ngtcp2_ppe_encode_frame(ngtcp2_ppe *ppe, ngtcp2_frame *fr); + +/* + * ngtcp2_ppe_final encrypts QUIC packet payload. If |**ppkt| is not + * NULL, the pointer to the packet is assigned to it. + * + * This function returns the length of QUIC packet, including header, + * and payload if it succeeds, or one of the following negative error + * codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User-defined callback function failed. + */ +ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt); + +/* + * ngtcp2_ppe_left returns the number of bytes left to write + * additional frames. This does not count AEAD overhead. + */ +size_t ngtcp2_ppe_left(ngtcp2_ppe *ppe); + +/* + * ngtcp2_ppe_pktlen returns the provisional packet length. It + * includes AEAD overhead. + */ +size_t ngtcp2_ppe_pktlen(ngtcp2_ppe *ppe); + +/** + * @function + * + * `ngtcp2_ppe_padding` encodes PADDING frames to the end of the + * buffer. This function returns the number of bytes padded. + */ +size_t ngtcp2_ppe_padding(ngtcp2_ppe *ppe); + +/* + * ngtcp2_ppe_padding_hp_sample adds PADDING frame if the current + * payload does not have enough space for header protection sample. + * This function should be called just before calling + * ngtcp2_ppe_final(). + * + * This function returns the number of bytes added as padding. + */ +size_t ngtcp2_ppe_padding_hp_sample(ngtcp2_ppe *ppe); + +/* + * ngtcp2_ppe_padding_size adds PADDING frame so that the size of QUIC + * packet is at least |n| bytes long. If it is unable to add PADDING + * in that way, this function still adds PADDING frame as much as + * possible. This function should be called just before calling + * ngtcp2_ppe_final(). For Short packet, this function should be + * called instead of ngtcp2_ppe_padding_hp_sample. + * + * This function returns the number of bytes added as padding. + */ +size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n); + +/* + * ngtcp2_ppe_ensure_hp_sample returns nonzero if the buffer has + * enough space for header protection sample. This should be called + * right after packet header is written. + */ +int ngtcp2_ppe_ensure_hp_sample(ngtcp2_ppe *ppe); + +#endif /* NGTCP2_PPE_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c new file mode 100644 index 0000000..5e1003d --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c @@ -0,0 +1,164 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * Copyright (c) 2012 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_pq.h" + +#include <assert.h> + +#include "ngtcp2_macro.h" + +void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_less less, const ngtcp2_mem *mem) { + pq->mem = mem; + pq->capacity = 0; + pq->q = NULL; + pq->length = 0; + pq->less = less; +} + +void ngtcp2_pq_free(ngtcp2_pq *pq) { + ngtcp2_mem_free(pq->mem, pq->q); + pq->q = NULL; +} + +static void swap(ngtcp2_pq *pq, size_t i, size_t j) { + ngtcp2_pq_entry *a = pq->q[i]; + ngtcp2_pq_entry *b = pq->q[j]; + + pq->q[i] = b; + b->index = i; + pq->q[j] = a; + a->index = j; +} + +static void bubble_up(ngtcp2_pq *pq, size_t index) { + size_t parent; + while (index != 0) { + parent = (index - 1) / 2; + if (!pq->less(pq->q[index], pq->q[parent])) { + return; + } + swap(pq, parent, index); + index = parent; + } +} + +int ngtcp2_pq_push(ngtcp2_pq *pq, ngtcp2_pq_entry *item) { + if (pq->capacity <= pq->length) { + void *nq; + size_t ncapacity; + + ncapacity = ngtcp2_max(4, (pq->capacity * 2)); + + nq = ngtcp2_mem_realloc(pq->mem, pq->q, + ncapacity * sizeof(ngtcp2_pq_entry *)); + if (nq == NULL) { + return NGTCP2_ERR_NOMEM; + } + pq->capacity = ncapacity; + pq->q = nq; + } + pq->q[pq->length] = item; + item->index = pq->length; + ++pq->length; + bubble_up(pq, pq->length - 1); + return 0; +} + +ngtcp2_pq_entry *ngtcp2_pq_top(ngtcp2_pq *pq) { + assert(pq->length); + return pq->q[0]; +} + +static void bubble_down(ngtcp2_pq *pq, size_t index) { + size_t i, j, minindex; + for (;;) { + j = index * 2 + 1; + minindex = index; + for (i = 0; i < 2; ++i, ++j) { + if (j >= pq->length) { + break; + } + if (pq->less(pq->q[j], pq->q[minindex])) { + minindex = j; + } + } + if (minindex == index) { + return; + } + swap(pq, index, minindex); + index = minindex; + } +} + +void ngtcp2_pq_pop(ngtcp2_pq *pq) { + if (pq->length > 0) { + pq->q[0] = pq->q[pq->length - 1]; + pq->q[0]->index = 0; + --pq->length; + bubble_down(pq, 0); + } +} + +void ngtcp2_pq_remove(ngtcp2_pq *pq, ngtcp2_pq_entry *item) { + assert(pq->q[item->index] == item); + + if (item->index == 0) { + ngtcp2_pq_pop(pq); + return; + } + + if (item->index == pq->length - 1) { + --pq->length; + return; + } + + pq->q[item->index] = pq->q[pq->length - 1]; + pq->q[item->index]->index = item->index; + --pq->length; + + if (pq->less(item, pq->q[item->index])) { + bubble_down(pq, item->index); + } else { + bubble_up(pq, item->index); + } +} + +int ngtcp2_pq_empty(ngtcp2_pq *pq) { return pq->length == 0; } + +size_t ngtcp2_pq_size(ngtcp2_pq *pq) { return pq->length; } + +int ngtcp2_pq_each(ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg) { + size_t i; + + if (pq->length == 0) { + return 0; + } + for (i = 0; i < pq->length; ++i) { + if ((*fun)(pq->q[i], arg)) { + return 1; + } + } + return 0; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h new file mode 100644 index 0000000..720c309 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h @@ -0,0 +1,126 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * Copyright (c) 2012 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_PQ_H +#define NGTCP2_PQ_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" + +/* Implementation of priority queue */ + +/* NGTCP2_PQ_BAD_INDEX is the priority queue index which indicates + that an entry is not queued. Assigning this value to + ngtcp2_pq_entry.index can check that the entry is queued or not. */ +#define NGTCP2_PQ_BAD_INDEX SIZE_MAX + +typedef struct ngtcp2_pq_entry { + size_t index; +} ngtcp2_pq_entry; + +/* "less" function, return nonzero if |lhs| is less than |rhs|. */ +typedef int (*ngtcp2_less)(const ngtcp2_pq_entry *lhs, + const ngtcp2_pq_entry *rhs); + +typedef struct ngtcp2_pq { + /* The pointer to the pointer to the item stored */ + ngtcp2_pq_entry **q; + /* Memory allocator */ + const ngtcp2_mem *mem; + /* The number of items stored */ + size_t length; + /* The maximum number of items this pq can store. This is + automatically extended when length is reached to this value. */ + size_t capacity; + /* The less function between items */ + ngtcp2_less less; +} ngtcp2_pq; + +/* + * Initializes priority queue |pq| with compare function |cmp|. + */ +void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_less less, const ngtcp2_mem *mem); + +/* + * Deallocates any resources allocated for |pq|. The stored items are + * not freed by this function. + */ +void ngtcp2_pq_free(ngtcp2_pq *pq); + +/* + * Adds |item| to the priority queue |pq|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_pq_push(ngtcp2_pq *pq, ngtcp2_pq_entry *item); + +/* + * Returns item at the top of the queue |pq|. It is undefined if the + * queue is empty. + */ +ngtcp2_pq_entry *ngtcp2_pq_top(ngtcp2_pq *pq); + +/* + * Pops item at the top of the queue |pq|. The popped item is not + * freed by this function. + */ +void ngtcp2_pq_pop(ngtcp2_pq *pq); + +/* + * Returns nonzero if the queue |pq| is empty. + */ +int ngtcp2_pq_empty(ngtcp2_pq *pq); + +/* + * Returns the number of items in the queue |pq|. + */ +size_t ngtcp2_pq_size(ngtcp2_pq *pq); + +typedef int (*ngtcp2_pq_item_cb)(ngtcp2_pq_entry *item, void *arg); + +/* + * Applys |fun| to each item in |pq|. The |arg| is passed as arg + * parameter to callback function. This function must not change the + * ordering key. If the return value from callback is nonzero, this + * function returns 1 immediately without iterating remaining items. + * Otherwise this function returns 0. + */ +int ngtcp2_pq_each(ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg); + +/* + * Removes |item| from priority queue. + */ +void ngtcp2_pq_remove(ngtcp2_pq *pq, ngtcp2_pq_entry *item); + +#endif /* NGTCP2_PQ_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c new file mode 100644 index 0000000..314e005 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c @@ -0,0 +1,172 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_pv.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_mem.h" +#include "ngtcp2_log.h" +#include "ngtcp2_macro.h" +#include "ngtcp2_addr.h" + +void ngtcp2_pv_entry_init(ngtcp2_pv_entry *pvent, const uint8_t *data, + ngtcp2_tstamp expiry, uint8_t flags) { + memcpy(pvent->data, data, sizeof(pvent->data)); + pvent->expiry = expiry; + pvent->flags = flags; +} + +int ngtcp2_pv_new(ngtcp2_pv **ppv, const ngtcp2_dcid *dcid, + ngtcp2_duration timeout, uint8_t flags, ngtcp2_log *log, + const ngtcp2_mem *mem) { + (*ppv) = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pv)); + if (*ppv == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_static_ringbuf_pv_ents_init(&(*ppv)->ents); + + ngtcp2_dcid_copy(&(*ppv)->dcid, dcid); + + (*ppv)->mem = mem; + (*ppv)->log = log; + (*ppv)->timeout = timeout; + (*ppv)->fallback_pto = 0; + (*ppv)->started_ts = UINT64_MAX; + (*ppv)->probe_pkt_left = NGTCP2_PV_NUM_PROBE_PKT; + (*ppv)->round = 0; + (*ppv)->flags = flags; + + return 0; +} + +void ngtcp2_pv_del(ngtcp2_pv *pv) { + if (pv == NULL) { + return; + } + ngtcp2_mem_free(pv->mem, pv); +} + +void ngtcp2_pv_add_entry(ngtcp2_pv *pv, const uint8_t *data, + ngtcp2_tstamp expiry, uint8_t flags, + ngtcp2_tstamp ts) { + ngtcp2_pv_entry *ent; + + assert(pv->probe_pkt_left); + + if (ngtcp2_ringbuf_len(&pv->ents.rb) == 0) { + pv->started_ts = ts; + } + + ent = ngtcp2_ringbuf_push_back(&pv->ents.rb); + ngtcp2_pv_entry_init(ent, data, expiry, flags); + + pv->flags &= (uint8_t)~NGTCP2_PV_FLAG_CANCEL_TIMER; + --pv->probe_pkt_left; +} + +int ngtcp2_pv_validate(ngtcp2_pv *pv, uint8_t *pflags, const uint8_t *data) { + size_t len = ngtcp2_ringbuf_len(&pv->ents.rb); + size_t i; + ngtcp2_pv_entry *ent; + + if (len == 0) { + return NGTCP2_ERR_INVALID_STATE; + } + + for (i = 0; i < len; ++i) { + ent = ngtcp2_ringbuf_get(&pv->ents.rb, i); + if (memcmp(ent->data, data, sizeof(ent->data)) == 0) { + *pflags = ent->flags; + ngtcp2_log_info(pv->log, NGTCP2_LOG_EVENT_PTV, "path has been validated"); + return 0; + } + } + + return NGTCP2_ERR_INVALID_ARGUMENT; +} + +void ngtcp2_pv_handle_entry_expiry(ngtcp2_pv *pv, ngtcp2_tstamp ts) { + ngtcp2_pv_entry *ent; + + if (ngtcp2_ringbuf_len(&pv->ents.rb) == 0) { + return; + } + + ent = ngtcp2_ringbuf_get(&pv->ents.rb, ngtcp2_ringbuf_len(&pv->ents.rb) - 1); + + if (ent->expiry > ts) { + return; + } + + ++pv->round; + pv->probe_pkt_left = NGTCP2_PV_NUM_PROBE_PKT; +} + +int ngtcp2_pv_should_send_probe(ngtcp2_pv *pv) { + return pv->probe_pkt_left > 0; +} + +int ngtcp2_pv_validation_timed_out(ngtcp2_pv *pv, ngtcp2_tstamp ts) { + ngtcp2_tstamp t; + ngtcp2_pv_entry *ent; + + if (pv->started_ts == UINT64_MAX) { + return 0; + } + + assert(ngtcp2_ringbuf_len(&pv->ents.rb)); + + ent = ngtcp2_ringbuf_get(&pv->ents.rb, ngtcp2_ringbuf_len(&pv->ents.rb) - 1); + + t = pv->started_ts + pv->timeout; + t = ngtcp2_max(t, ent->expiry); + + return t <= ts; +} + +ngtcp2_tstamp ngtcp2_pv_next_expiry(ngtcp2_pv *pv) { + ngtcp2_pv_entry *ent; + + if ((pv->flags & NGTCP2_PV_FLAG_CANCEL_TIMER) || + ngtcp2_ringbuf_len(&pv->ents.rb) == 0) { + return UINT64_MAX; + } + + ent = ngtcp2_ringbuf_get(&pv->ents.rb, ngtcp2_ringbuf_len(&pv->ents.rb) - 1); + + return ent->expiry; +} + +void ngtcp2_pv_cancel_expired_timer(ngtcp2_pv *pv, ngtcp2_tstamp ts) { + ngtcp2_tstamp expiry = ngtcp2_pv_next_expiry(pv); + + if (expiry > ts) { + return; + } + + pv->flags |= NGTCP2_PV_FLAG_CANCEL_TIMER; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h new file mode 100644 index 0000000..293cbca --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h @@ -0,0 +1,198 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_PV_H +#define NGTCP2_PV_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_cid.h" +#include "ngtcp2_ringbuf.h" + +/* NGTCP2_PV_MAX_ENTRIES is the maximum number of entries that + ngtcp2_pv can contain. It must be power of 2. */ +#define NGTCP2_PV_MAX_ENTRIES 8 +/* NGTCP2_PV_NUM_PROBE_PKT is the number of probe packets containing + PATH_CHALLENGE sent at a time. */ +#define NGTCP2_PV_NUM_PROBE_PKT 2 + +typedef struct ngtcp2_log ngtcp2_log; + +typedef struct ngtcp2_frame_chain ngtcp2_frame_chain; + +/* NGTCP2_PV_ENTRY_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_PV_ENTRY_FLAG_NONE 0x00u +/* NGTCP2_PV_ENTRY_FLAG_UNDERSIZED indicates that UDP datagram which + contains PATH_CHALLENGE is undersized (< 1200 bytes) */ +#define NGTCP2_PV_ENTRY_FLAG_UNDERSIZED 0x01u + +typedef struct ngtcp2_pv_entry { + /* expiry is the timestamp when this PATH_CHALLENGE expires. */ + ngtcp2_tstamp expiry; + /* flags is zero or more of NGTCP2_PV_ENTRY_FLAG_*. */ + uint8_t flags; + /* data is a byte string included in PATH_CHALLENGE. */ + uint8_t data[8]; +} ngtcp2_pv_entry; + +void ngtcp2_pv_entry_init(ngtcp2_pv_entry *pvent, const uint8_t *data, + ngtcp2_tstamp expiry, uint8_t flags); + +/* NGTCP2_PV_FLAG_NONE indicates no flag is set. */ +#define NGTCP2_PV_FLAG_NONE 0x00u +/* NGTCP2_PV_FLAG_DONT_CARE indicates that the outcome of path + validation should be ignored entirely. */ +#define NGTCP2_PV_FLAG_DONT_CARE 0x01u +/* NGTCP2_PV_FLAG_CANCEL_TIMER indicates that the expiry timer is + cancelled. */ +#define NGTCP2_PV_FLAG_CANCEL_TIMER 0x02u +/* NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE indicates that fallback DCID is + available in ngtcp2_pv. If path validation fails, fallback to the + fallback DCID. If path validation succeeds, fallback DCID is + retired if it does not equal to the current DCID. */ +#define NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE 0x04u +/* NGTCP2_PV_FLAG_MTU_PROBE indicates that a validation must probe + least MTU that QUIC requires, which is 1200 bytes. If it fails, a + path is not viable. */ +#define NGTCP2_PV_FLAG_MTU_PROBE 0x08u +/* NGTCP2_PV_FLAG_PREFERRED_ADDR indicates that client is migrating to + server's preferred address. This flag is only used by client. */ +#define NGTCP2_PV_FLAG_PREFERRED_ADDR 0x10u + +typedef struct ngtcp2_pv ngtcp2_pv; + +ngtcp2_static_ringbuf_def(pv_ents, NGTCP2_PV_MAX_ENTRIES, + sizeof(ngtcp2_pv_entry)); +/* + * ngtcp2_pv is the context of a single path validation. + */ +struct ngtcp2_pv { + const ngtcp2_mem *mem; + ngtcp2_log *log; + /* dcid is DCID and path this path validation uses. */ + ngtcp2_dcid dcid; + /* fallback_dcid is the usually validated DCID and used as a + fallback if this path validation fails. */ + ngtcp2_dcid fallback_dcid; + /* ents is the ring buffer of ngtcp2_pv_entry */ + ngtcp2_static_ringbuf_pv_ents ents; + /* timeout is the duration within which this path validation should + succeed. */ + ngtcp2_duration timeout; + /* fallback_pto is PTO of fallback connection. */ + ngtcp2_duration fallback_pto; + /* started_ts is the timestamp this path validation starts. */ + ngtcp2_tstamp started_ts; + /* round is the number of times that probe_pkt_left is reset. */ + size_t round; + /* probe_pkt_left is the number of probe packets containing + PATH_CHALLENGE which can be send without waiting for an + expiration of a previous flight. */ + size_t probe_pkt_left; + /* flags is bitwise-OR of zero or more of NGTCP2_PV_FLAG_*. */ + uint8_t flags; +}; + +/* + * ngtcp2_pv_new creates new ngtcp2_pv object and assigns its pointer + * to |*ppv|. This function makes a copy of |dcid|. |timeout| is a + * duration within which this path validation must succeed. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_pv_new(ngtcp2_pv **ppv, const ngtcp2_dcid *dcid, + ngtcp2_duration timeout, uint8_t flags, ngtcp2_log *log, + const ngtcp2_mem *mem); + +/* + * ngtcp2_pv_del deallocates |pv|. This function frees memory |pv| + * points too. + */ +void ngtcp2_pv_del(ngtcp2_pv *pv); + +/* + * ngtcp2_pv_add_entry adds new entry with |data|. |expiry| is the + * expiry time of the entry. + */ +void ngtcp2_pv_add_entry(ngtcp2_pv *pv, const uint8_t *data, + ngtcp2_tstamp expiry, uint8_t flags, ngtcp2_tstamp ts); + +/* + * ngtcp2_pv_full returns nonzero if |pv| is full of ngtcp2_pv_entry. + */ +int ngtcp2_pv_full(ngtcp2_pv *pv); + +/* + * ngtcp2_pv_validate validates that the received |data| matches the + * one of the existing entry. The flag of ngtcp2_pv_entry that + * matches |data| is assigned to |*pflags| if this function succeeds. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_PATH_VALIDATION_FAILED + * path validation has failed and must be abandoned + * NGTCP2_ERR_INVALID_STATE + * |pv| includes no entry + * NGTCP2_ERR_INVALID_ARGUMENT + * |pv| does not have an entry which has |data| and |path| + */ +int ngtcp2_pv_validate(ngtcp2_pv *pv, uint8_t *pflags, const uint8_t *data); + +/* + * ngtcp2_pv_handle_entry_expiry checks expiry of existing entries. + */ +void ngtcp2_pv_handle_entry_expiry(ngtcp2_pv *pv, ngtcp2_tstamp ts); + +/* + * ngtcp2_pv_should_send_probe returns nonzero if new entry can be + * added by ngtcp2_pv_add_entry. + */ +int ngtcp2_pv_should_send_probe(ngtcp2_pv *pv); + +/* + * ngtcp2_pv_validation_timed_out returns nonzero if the path + * validation fails because of timeout. + */ +int ngtcp2_pv_validation_timed_out(ngtcp2_pv *pv, ngtcp2_tstamp ts); + +/* + * ngtcp2_pv_next_expiry returns the earliest expiry. + */ +ngtcp2_tstamp ngtcp2_pv_next_expiry(ngtcp2_pv *pv); + +/* + * ngtcp2_pv_cancel_expired_timer cancels the expired timer. + */ +void ngtcp2_pv_cancel_expired_timer(ngtcp2_pv *pv, ngtcp2_tstamp ts); + +#endif /* NGTCP2_PV_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c new file mode 100644 index 0000000..5107f44 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c @@ -0,0 +1,1218 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_qlog.h" + +#include <assert.h> + +#include "ngtcp2_str.h" +#include "ngtcp2_vec.h" +#include "ngtcp2_conv.h" +#include "ngtcp2_net.h" +#include "ngtcp2_unreachable.h" + +void ngtcp2_qlog_init(ngtcp2_qlog *qlog, ngtcp2_qlog_write write, + ngtcp2_tstamp ts, void *user_data) { + qlog->write = write; + qlog->ts = qlog->last_ts = ts; + qlog->user_data = user_data; +} + +#define write_verbatim(DEST, S) ngtcp2_cpymem((DEST), (S), sizeof(S) - 1) + +static uint8_t *write_string_impl(uint8_t *p, const uint8_t *data, + size_t datalen) { + *p++ = '"'; + if (datalen) { + p = ngtcp2_cpymem(p, data, datalen); + } + *p++ = '"'; + return p; +} + +#define write_string(DEST, S) \ + write_string_impl((DEST), (const uint8_t *)(S), sizeof(S) - 1) + +#define NGTCP2_LOWER_XDIGITS "0123456789abcdef" + +static uint8_t *write_hex(uint8_t *p, const uint8_t *data, size_t datalen) { + const uint8_t *b = data, *end = data + datalen; + *p++ = '"'; + for (; b != end; ++b) { + *p++ = (uint8_t)NGTCP2_LOWER_XDIGITS[*b >> 4]; + *p++ = (uint8_t)NGTCP2_LOWER_XDIGITS[*b & 0xf]; + } + *p++ = '"'; + return p; +} + +static uint8_t *write_cid(uint8_t *p, const ngtcp2_cid *cid) { + return write_hex(p, cid->data, cid->datalen); +} + +static uint8_t *write_number(uint8_t *p, uint64_t n) { + size_t nlen = 0; + uint64_t t; + uint8_t *res; + + if (n == 0) { + *p++ = '0'; + return p; + } + for (t = n; t; t /= 10, ++nlen) + ; + p += nlen; + res = p; + for (; n; n /= 10) { + *--p = (uint8_t)((n % 10) + '0'); + } + return res; +} + +static uint8_t *write_tstamp(uint8_t *p, ngtcp2_tstamp ts) { + return write_number(p, ts / NGTCP2_MILLISECONDS); +} + +static uint8_t *write_duration(uint8_t *p, ngtcp2_duration duration) { + return write_number(p, duration / NGTCP2_MILLISECONDS); +} + +static uint8_t *write_bool(uint8_t *p, int b) { + if (b) { + return ngtcp2_cpymem(p, "true", sizeof("true") - 1); + } + return ngtcp2_cpymem(p, "false", sizeof("false") - 1); +} + +static uint8_t *write_pair_impl(uint8_t *p, const uint8_t *name, size_t namelen, + const ngtcp2_vec *value) { + p = write_string_impl(p, name, namelen); + *p++ = ':'; + return write_string_impl(p, value->base, value->len); +} + +#define write_pair(DEST, NAME, VALUE) \ + write_pair_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, (VALUE)) + +static uint8_t *write_pair_hex_impl(uint8_t *p, const uint8_t *name, + size_t namelen, const uint8_t *value, + size_t valuelen) { + p = write_string_impl(p, name, namelen); + *p++ = ':'; + return write_hex(p, value, valuelen); +} + +#define write_pair_hex(DEST, NAME, VALUE, VALUELEN) \ + write_pair_hex_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \ + (VALUE), (VALUELEN)) + +static uint8_t *write_pair_number_impl(uint8_t *p, const uint8_t *name, + size_t namelen, uint64_t value) { + p = write_string_impl(p, name, namelen); + *p++ = ':'; + return write_number(p, value); +} + +#define write_pair_number(DEST, NAME, VALUE) \ + write_pair_number_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \ + (VALUE)) + +static uint8_t *write_pair_duration_impl(uint8_t *p, const uint8_t *name, + size_t namelen, + ngtcp2_duration duration) { + p = write_string_impl(p, name, namelen); + *p++ = ':'; + return write_duration(p, duration); +} + +#define write_pair_duration(DEST, NAME, VALUE) \ + write_pair_duration_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \ + (VALUE)) + +static uint8_t *write_pair_tstamp_impl(uint8_t *p, const uint8_t *name, + size_t namelen, ngtcp2_tstamp ts) { + p = write_string_impl(p, name, namelen); + *p++ = ':'; + return write_tstamp(p, ts); +} + +#define write_pair_tstamp(DEST, NAME, VALUE) \ + write_pair_tstamp_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \ + (VALUE)) + +static uint8_t *write_pair_bool_impl(uint8_t *p, const uint8_t *name, + size_t namelen, int b) { + p = write_string_impl(p, name, namelen); + *p++ = ':'; + return write_bool(p, b); +} + +#define write_pair_bool(DEST, NAME, VALUE) \ + write_pair_bool_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \ + (VALUE)) + +static uint8_t *write_pair_cid_impl(uint8_t *p, const uint8_t *name, + size_t namelen, const ngtcp2_cid *cid) { + p = write_string_impl(p, name, namelen); + *p++ = ':'; + return write_cid(p, cid); +} + +#define write_pair_cid(DEST, NAME, VALUE) \ + write_pair_cid_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \ + (VALUE)) + +#define ngtcp2_make_vec_lit(S) \ + { (uint8_t *)(S), sizeof((S)) - 1 } + +static uint8_t *write_common_fields(uint8_t *p, const ngtcp2_cid *odcid) { + p = write_verbatim( + p, "\"common_fields\":{\"protocol_type\":[\"QUIC\"],\"time_format\":" + "\"relative\",\"reference_time\":0,\"group_id\":"); + p = write_cid(p, odcid); + *p++ = '}'; + return p; +} + +static uint8_t *write_trace(uint8_t *p, int server, const ngtcp2_cid *odcid) { + p = write_verbatim( + p, "\"trace\":{\"vantage_point\":{\"name\":\"ngtcp2\",\"type\":"); + if (server) { + p = write_string(p, "server"); + } else { + p = write_string(p, "client"); + } + p = write_verbatim(p, "},"); + p = write_common_fields(p, odcid); + *p++ = '}'; + return p; +} + +void ngtcp2_qlog_start(ngtcp2_qlog *qlog, const ngtcp2_cid *odcid, int server) { + uint8_t buf[1024]; + uint8_t *p = buf; + + if (!qlog->write) { + return; + } + + p = write_verbatim( + p, "\x1e{\"qlog_format\":\"JSON-SEQ\",\"qlog_version\":\"0.3\","); + p = write_trace(p, server, odcid); + p = write_verbatim(p, "}\n"); + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf, + (size_t)(p - buf)); +} + +void ngtcp2_qlog_end(ngtcp2_qlog *qlog) { + uint8_t buf[1] = {0}; + + if (!qlog->write) { + return; + } + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_FIN, &buf, 0); +} + +static ngtcp2_vec vec_pkt_type_initial = ngtcp2_make_vec_lit("initial"); +static ngtcp2_vec vec_pkt_type_handshake = ngtcp2_make_vec_lit("handshake"); +static ngtcp2_vec vec_pkt_type_0rtt = ngtcp2_make_vec_lit("0RTT"); +static ngtcp2_vec vec_pkt_type_1rtt = ngtcp2_make_vec_lit("1RTT"); +static ngtcp2_vec vec_pkt_type_retry = ngtcp2_make_vec_lit("retry"); +static ngtcp2_vec vec_pkt_type_version_negotiation = + ngtcp2_make_vec_lit("version_negotiation"); +static ngtcp2_vec vec_pkt_type_stateless_reset = + ngtcp2_make_vec_lit("stateless_reset"); +static ngtcp2_vec vec_pkt_type_unknown = ngtcp2_make_vec_lit("unknown"); + +static const ngtcp2_vec *qlog_pkt_type(const ngtcp2_pkt_hd *hd) { + if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) { + switch (hd->type) { + case NGTCP2_PKT_INITIAL: + return &vec_pkt_type_initial; + case NGTCP2_PKT_HANDSHAKE: + return &vec_pkt_type_handshake; + case NGTCP2_PKT_0RTT: + return &vec_pkt_type_0rtt; + case NGTCP2_PKT_RETRY: + return &vec_pkt_type_retry; + default: + return &vec_pkt_type_unknown; + } + } + + switch (hd->type) { + case NGTCP2_PKT_VERSION_NEGOTIATION: + return &vec_pkt_type_version_negotiation; + case NGTCP2_PKT_STATELESS_RESET: + return &vec_pkt_type_stateless_reset; + case NGTCP2_PKT_1RTT: + return &vec_pkt_type_1rtt; + default: + return &vec_pkt_type_unknown; + } +} + +static uint8_t *write_pkt_hd(uint8_t *p, const ngtcp2_pkt_hd *hd) { + /* + * {"packet_type":"version_negotiation","packet_number":"0000000000000000000","token":{"data":""}} + */ +#define NGTCP2_QLOG_PKT_HD_OVERHEAD 95 + + *p++ = '{'; + p = write_pair(p, "packet_type", qlog_pkt_type(hd)); + *p++ = ','; + p = write_pair_number(p, "packet_number", (uint64_t)hd->pkt_num); + if (hd->type == NGTCP2_PKT_INITIAL && hd->tokenlen) { + p = write_verbatim(p, ",\"token\":{"); + p = write_pair_hex(p, "data", hd->token, hd->tokenlen); + *p++ = '}'; + } + /* TODO Write DCIL and DCID */ + /* TODO Write SCIL and SCID */ + *p++ = '}'; + return p; +} + +static uint8_t *write_padding_frame(uint8_t *p, const ngtcp2_padding *fr) { + (void)fr; + + /* {"frame_type":"padding"} */ +#define NGTCP2_QLOG_PADDING_FRAME_OVERHEAD 24 + + return write_verbatim(p, "{\"frame_type\":\"padding\"}"); +} + +static uint8_t *write_ping_frame(uint8_t *p, const ngtcp2_ping *fr) { + (void)fr; + + /* {"frame_type":"ping"} */ +#define NGTCP2_QLOG_PING_FRAME_OVERHEAD 21 + + return write_verbatim(p, "{\"frame_type\":\"ping\"}"); +} + +static uint8_t *write_ack_frame(uint8_t *p, const ngtcp2_ack *fr) { + int64_t largest_ack, min_ack; + size_t i; + const ngtcp2_ack_range *range; + + /* + * {"frame_type":"ack","ack_delay":0000000000000000000,"acked_ranges":[]} + * + * each range: + * [0000000000000000000,0000000000000000000], + * + * ecn: + * ,"ect1":0000000000000000000,"ect0":0000000000000000000,"ce":0000000000000000000 + */ +#define NGTCP2_QLOG_ACK_FRAME_BASE_OVERHEAD 70 +#define NGTCP2_QLOG_ACK_FRAME_RANGE_OVERHEAD 42 +#define NGTCP2_QLOG_ACK_FRAME_ECN_OVERHEAD 79 + + p = write_verbatim(p, "{\"frame_type\":\"ack\","); + p = write_pair_duration(p, "ack_delay", fr->ack_delay_unscaled); + p = write_verbatim(p, ",\"acked_ranges\":["); + + largest_ack = fr->largest_ack; + min_ack = fr->largest_ack - (int64_t)fr->first_ack_range; + + *p++ = '['; + p = write_number(p, (uint64_t)min_ack); + if (largest_ack != min_ack) { + *p++ = ','; + p = write_number(p, (uint64_t)largest_ack); + } + *p++ = ']'; + + for (i = 0; i < fr->rangecnt; ++i) { + range = &fr->ranges[i]; + largest_ack = min_ack - (int64_t)range->gap - 2; + min_ack = largest_ack - (int64_t)range->len; + *p++ = ','; + *p++ = '['; + p = write_number(p, (uint64_t)min_ack); + if (largest_ack != min_ack) { + *p++ = ','; + p = write_number(p, (uint64_t)largest_ack); + } + *p++ = ']'; + } + + *p++ = ']'; + + if (fr->type == NGTCP2_FRAME_ACK_ECN) { + *p++ = ','; + p = write_pair_number(p, "ect1", fr->ecn.ect1); + *p++ = ','; + p = write_pair_number(p, "ect0", fr->ecn.ect0); + *p++ = ','; + p = write_pair_number(p, "ce", fr->ecn.ce); + } + + *p++ = '}'; + + return p; +} + +static uint8_t *write_reset_stream_frame(uint8_t *p, + const ngtcp2_reset_stream *fr) { + /* + * {"frame_type":"reset_stream","stream_id":0000000000000000000,"error_code":0000000000000000000,"final_size":0000000000000000000} + */ +#define NGTCP2_QLOG_RESET_STREAM_FRAME_OVERHEAD 127 + + p = write_verbatim(p, "{\"frame_type\":\"reset_stream\","); + p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id); + *p++ = ','; + p = write_pair_number(p, "error_code", fr->app_error_code); + *p++ = ','; + p = write_pair_number(p, "final_size", fr->final_size); + *p++ = '}'; + + return p; +} + +static uint8_t *write_stop_sending_frame(uint8_t *p, + const ngtcp2_stop_sending *fr) { + /* + * {"frame_type":"stop_sending","stream_id":0000000000000000000,"error_code":0000000000000000000} + */ +#define NGTCP2_QLOG_STOP_SENDING_FRAME_OVERHEAD 94 + + p = write_verbatim(p, "{\"frame_type\":\"stop_sending\","); + p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id); + *p++ = ','; + p = write_pair_number(p, "error_code", fr->app_error_code); + *p++ = '}'; + + return p; +} + +static uint8_t *write_crypto_frame(uint8_t *p, const ngtcp2_crypto *fr) { + /* + * {"frame_type":"crypto","offset":0000000000000000000,"length":0000000000000000000} + */ +#define NGTCP2_QLOG_CRYPTO_FRAME_OVERHEAD 81 + + p = write_verbatim(p, "{\"frame_type\":\"crypto\","); + p = write_pair_number(p, "offset", fr->offset); + *p++ = ','; + p = write_pair_number(p, "length", ngtcp2_vec_len(fr->data, fr->datacnt)); + *p++ = '}'; + + return p; +} + +static uint8_t *write_new_token_frame(uint8_t *p, const ngtcp2_new_token *fr) { + /* + * {"frame_type":"new_token","length":0000000000000000000,"token":{"data":""}} + */ +#define NGTCP2_QLOG_NEW_TOKEN_FRAME_OVERHEAD 75 + + p = write_verbatim(p, "{\"frame_type\":\"new_token\","); + p = write_pair_number(p, "length", fr->tokenlen); + p = write_verbatim(p, ",\"token\":{"); + p = write_pair_hex(p, "data", fr->token, fr->tokenlen); + *p++ = '}'; + *p++ = '}'; + + return p; +} + +static uint8_t *write_stream_frame(uint8_t *p, const ngtcp2_stream *fr) { + /* + * {"frame_type":"stream","stream_id":0000000000000000000,"offset":0000000000000000000,"length":0000000000000000000,"fin":true} + */ +#define NGTCP2_QLOG_STREAM_FRAME_OVERHEAD 124 + + p = write_verbatim(p, "{\"frame_type\":\"stream\","); + p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id); + *p++ = ','; + p = write_pair_number(p, "offset", fr->offset); + *p++ = ','; + p = write_pair_number(p, "length", ngtcp2_vec_len(fr->data, fr->datacnt)); + if (fr->fin) { + *p++ = ','; + p = write_pair_bool(p, "fin", 1); + } + *p++ = '}'; + + return p; +} + +static uint8_t *write_max_data_frame(uint8_t *p, const ngtcp2_max_data *fr) { + /* + * {"frame_type":"max_data","maximum":0000000000000000000} + */ +#define NGTCP2_QLOG_MAX_DATA_FRAME_OVERHEAD 55 + + p = write_verbatim(p, "{\"frame_type\":\"max_data\","); + p = write_pair_number(p, "maximum", fr->max_data); + *p++ = '}'; + + return p; +} + +static uint8_t *write_max_stream_data_frame(uint8_t *p, + const ngtcp2_max_stream_data *fr) { + /* + * {"frame_type":"max_stream_data","stream_id":0000000000000000000,"maximum":0000000000000000000} + */ +#define NGTCP2_QLOG_MAX_STREAM_DATA_FRAME_OVERHEAD 94 + + p = write_verbatim(p, "{\"frame_type\":\"max_stream_data\","); + p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id); + *p++ = ','; + p = write_pair_number(p, "maximum", fr->max_stream_data); + *p++ = '}'; + + return p; +} + +static uint8_t *write_max_streams_frame(uint8_t *p, + const ngtcp2_max_streams *fr) { + /* + * {"frame_type":"max_streams","stream_type":"unidirectional","maximum":0000000000000000000} + */ +#define NGTCP2_QLOG_MAX_STREAMS_FRAME_OVERHEAD 89 + + p = write_verbatim(p, "{\"frame_type\":\"max_streams\",\"stream_type\":"); + if (fr->type == NGTCP2_FRAME_MAX_STREAMS_BIDI) { + p = write_string(p, "bidirectional"); + } else { + p = write_string(p, "unidirectional"); + } + *p++ = ','; + p = write_pair_number(p, "maximum", fr->max_streams); + *p++ = '}'; + + return p; +} + +static uint8_t *write_data_blocked_frame(uint8_t *p, + const ngtcp2_data_blocked *fr) { + (void)fr; + + /* + * {"frame_type":"data_blocked"} + */ +#define NGTCP2_QLOG_DATA_BLOCKED_FRAME_OVERHEAD 29 + + /* TODO log limit */ + + return write_verbatim(p, "{\"frame_type\":\"data_blocked\"}"); +} + +static uint8_t * +write_stream_data_blocked_frame(uint8_t *p, + const ngtcp2_stream_data_blocked *fr) { + (void)fr; + + /* + * {"frame_type":"stream_data_blocked"} + */ +#define NGTCP2_QLOG_STREAM_DATA_BLOCKED_FRAME_OVERHEAD 36 + + /* TODO log limit */ + + return write_verbatim(p, "{\"frame_type\":\"stream_data_blocked\"}"); +} + +static uint8_t *write_streams_blocked_frame(uint8_t *p, + const ngtcp2_streams_blocked *fr) { + (void)fr; + + /* + * {"frame_type":"streams_blocked"} + */ +#define NGTCP2_QLOG_STREAMS_BLOCKED_FRAME_OVERHEAD 32 + + /* TODO Log stream_type and limit */ + + return write_verbatim(p, "{\"frame_type\":\"streams_blocked\"}"); +} + +static uint8_t * +write_new_connection_id_frame(uint8_t *p, const ngtcp2_new_connection_id *fr) { + /* + * {"frame_type":"new_connection_id","sequence_number":0000000000000000000,"retire_prior_to":0000000000000000000,"connection_id_length":0000000000000000000,"connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","stateless_reset_token":{"data":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}} + */ +#define NGTCP2_QLOG_NEW_CONNECTION_ID_FRAME_OVERHEAD 280 + + p = write_verbatim(p, "{\"frame_type\":\"new_connection_id\","); + p = write_pair_number(p, "sequence_number", fr->seq); + *p++ = ','; + p = write_pair_number(p, "retire_prior_to", fr->retire_prior_to); + *p++ = ','; + p = write_pair_number(p, "connection_id_length", fr->cid.datalen); + *p++ = ','; + p = write_pair_cid(p, "connection_id", &fr->cid); + p = write_verbatim(p, ",\"stateless_reset_token\":{"); + p = write_pair_hex(p, "data", fr->stateless_reset_token, + sizeof(fr->stateless_reset_token)); + *p++ = '}'; + *p++ = '}'; + + return p; +} + +static uint8_t * +write_retire_connection_id_frame(uint8_t *p, + const ngtcp2_retire_connection_id *fr) { + /* + * {"frame_type":"retire_connection_id","sequence_number":0000000000000000000} + */ +#define NGTCP2_QLOG_RETIRE_CONNECTION_ID_FRAME_OVERHEAD 75 + + p = write_verbatim(p, "{\"frame_type\":\"retire_connection_id\","); + p = write_pair_number(p, "sequence_number", fr->seq); + *p++ = '}'; + + return p; +} + +static uint8_t *write_path_challenge_frame(uint8_t *p, + const ngtcp2_path_challenge *fr) { + /* + * {"frame_type":"path_challenge","data":"xxxxxxxxxxxxxxxx"} + */ +#define NGTCP2_QLOG_PATH_CHALLENGE_FRAME_OVERHEAD 57 + + p = write_verbatim(p, "{\"frame_type\":\"path_challenge\","); + p = write_pair_hex(p, "data", fr->data, sizeof(fr->data)); + *p++ = '}'; + + return p; +} + +static uint8_t *write_path_response_frame(uint8_t *p, + const ngtcp2_path_response *fr) { + /* + * {"frame_type":"path_response","data":"xxxxxxxxxxxxxxxx"} + */ +#define NGTCP2_QLOG_PATH_RESPONSE_FRAME_OVERHEAD 56 + + p = write_verbatim(p, "{\"frame_type\":\"path_response\","); + p = write_pair_hex(p, "data", fr->data, sizeof(fr->data)); + *p++ = '}'; + + return p; +} + +static uint8_t * +write_connection_close_frame(uint8_t *p, const ngtcp2_connection_close *fr) { + /* + * {"frame_type":"connection_close","error_space":"application","error_code":0000000000000000000,"raw_error_code":0000000000000000000} + */ +#define NGTCP2_QLOG_CONNECTION_CLOSE_FRAME_OVERHEAD 131 + + p = write_verbatim(p, + "{\"frame_type\":\"connection_close\",\"error_space\":"); + if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) { + p = write_string(p, "transport"); + } else { + p = write_string(p, "application"); + } + *p++ = ','; + p = write_pair_number(p, "error_code", fr->error_code); + *p++ = ','; + p = write_pair_number(p, "raw_error_code", fr->error_code); + /* TODO Write reason by escaping non-printables */ + /* TODO Write trigger_frame_type */ + *p++ = '}'; + + return p; +} + +static uint8_t *write_handshake_done_frame(uint8_t *p, + const ngtcp2_handshake_done *fr) { + (void)fr; + + /* + * {"frame_type":"handshake_done"} + */ +#define NGTCP2_QLOG_HANDSHAKE_DONE_FRAME_OVERHEAD 31 + + return write_verbatim(p, "{\"frame_type\":\"handshake_done\"}"); +} + +static uint8_t *write_datagram_frame(uint8_t *p, const ngtcp2_datagram *fr) { + /* + * {"frame_type":"datagram","length":0000000000000000000} + */ +#define NGTCP2_QLOG_DATAGRAM_FRAME_OVERHEAD 54 + + p = write_verbatim(p, "{\"frame_type\":\"datagram\","); + p = write_pair_number(p, "length", ngtcp2_vec_len(fr->data, fr->datacnt)); + *p++ = '}'; + + return p; +} + +static uint8_t *qlog_write_time(ngtcp2_qlog *qlog, uint8_t *p) { + return write_pair_tstamp(p, "time", qlog->last_ts - qlog->ts); +} + +static void qlog_pkt_write_start(ngtcp2_qlog *qlog, int sent) { + uint8_t *p; + + if (!qlog->write) { + return; + } + + ngtcp2_buf_reset(&qlog->buf); + p = qlog->buf.last; + + *p++ = '\x1e'; + *p++ = '{'; + p = qlog_write_time(qlog, p); + p = write_verbatim(p, ",\"name\":"); + if (sent) { + p = write_string(p, "transport:packet_sent"); + } else { + p = write_string(p, "transport:packet_received"); + } + p = write_verbatim(p, ",\"data\":{\"frames\":["); + qlog->buf.last = p; +} + +static void qlog_pkt_write_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, + size_t pktlen) { + uint8_t *p = qlog->buf.last; + + if (!qlog->write) { + return; + } + + /* + * ],"header":,"raw":{"length":0000000000000000000}}} + * + * plus, terminating LF + */ +#define NGTCP2_QLOG_PKT_WRITE_END_OVERHEAD \ + (1 + 50 + NGTCP2_QLOG_PKT_HD_OVERHEAD) + + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_PKT_WRITE_END_OVERHEAD + hd->tokenlen * 2) { + return; + } + + assert(ngtcp2_buf_len(&qlog->buf)); + + /* Eat last ',' */ + if (*(p - 1) == ',') { + --p; + } + + p = write_verbatim(p, "],\"header\":"); + p = write_pkt_hd(p, hd); + p = write_verbatim(p, ",\"raw\":{\"length\":"); + p = write_number(p, pktlen); + p = write_verbatim(p, "}}}\n"); + + qlog->buf.last = p; + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, qlog->buf.pos, + ngtcp2_buf_len(&qlog->buf)); +} + +void ngtcp2_qlog_write_frame(ngtcp2_qlog *qlog, const ngtcp2_frame *fr) { + uint8_t *p = qlog->buf.last; + + if (!qlog->write) { + return; + } + + switch (fr->type) { + case NGTCP2_FRAME_PADDING: + if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_PADDING_FRAME_OVERHEAD + 1) { + return; + } + p = write_padding_frame(p, &fr->padding); + break; + case NGTCP2_FRAME_PING: + if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_PING_FRAME_OVERHEAD + 1) { + return; + } + p = write_ping_frame(p, &fr->ping); + break; + case NGTCP2_FRAME_ACK: + case NGTCP2_FRAME_ACK_ECN: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_ACK_FRAME_BASE_OVERHEAD + + (size_t)(fr->type == NGTCP2_FRAME_ACK_ECN + ? NGTCP2_QLOG_ACK_FRAME_ECN_OVERHEAD + : 0) + + NGTCP2_QLOG_ACK_FRAME_RANGE_OVERHEAD * (1 + fr->ack.rangecnt) + 1) { + return; + } + p = write_ack_frame(p, &fr->ack); + break; + case NGTCP2_FRAME_RESET_STREAM: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_RESET_STREAM_FRAME_OVERHEAD + 1) { + return; + } + p = write_reset_stream_frame(p, &fr->reset_stream); + break; + case NGTCP2_FRAME_STOP_SENDING: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_STOP_SENDING_FRAME_OVERHEAD + 1) { + return; + } + p = write_stop_sending_frame(p, &fr->stop_sending); + break; + case NGTCP2_FRAME_CRYPTO: + if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_CRYPTO_FRAME_OVERHEAD + 1) { + return; + } + p = write_crypto_frame(p, &fr->crypto); + break; + case NGTCP2_FRAME_NEW_TOKEN: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_NEW_TOKEN_FRAME_OVERHEAD + fr->new_token.tokenlen * 2 + 1) { + return; + } + p = write_new_token_frame(p, &fr->new_token); + break; + case NGTCP2_FRAME_STREAM: + if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_STREAM_FRAME_OVERHEAD + 1) { + return; + } + p = write_stream_frame(p, &fr->stream); + break; + case NGTCP2_FRAME_MAX_DATA: + if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_MAX_DATA_FRAME_OVERHEAD + 1) { + return; + } + p = write_max_data_frame(p, &fr->max_data); + break; + case NGTCP2_FRAME_MAX_STREAM_DATA: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_MAX_STREAM_DATA_FRAME_OVERHEAD + 1) { + return; + } + p = write_max_stream_data_frame(p, &fr->max_stream_data); + break; + case NGTCP2_FRAME_MAX_STREAMS_BIDI: + case NGTCP2_FRAME_MAX_STREAMS_UNI: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_MAX_STREAMS_FRAME_OVERHEAD + 1) { + return; + } + p = write_max_streams_frame(p, &fr->max_streams); + break; + case NGTCP2_FRAME_DATA_BLOCKED: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_DATA_BLOCKED_FRAME_OVERHEAD + 1) { + return; + } + p = write_data_blocked_frame(p, &fr->data_blocked); + break; + case NGTCP2_FRAME_STREAM_DATA_BLOCKED: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_STREAM_DATA_BLOCKED_FRAME_OVERHEAD + 1) { + return; + } + p = write_stream_data_blocked_frame(p, &fr->stream_data_blocked); + break; + case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI: + case NGTCP2_FRAME_STREAMS_BLOCKED_UNI: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_STREAMS_BLOCKED_FRAME_OVERHEAD + 1) { + return; + } + p = write_streams_blocked_frame(p, &fr->streams_blocked); + break; + case NGTCP2_FRAME_NEW_CONNECTION_ID: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_NEW_CONNECTION_ID_FRAME_OVERHEAD + 1) { + return; + } + p = write_new_connection_id_frame(p, &fr->new_connection_id); + break; + case NGTCP2_FRAME_RETIRE_CONNECTION_ID: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_RETIRE_CONNECTION_ID_FRAME_OVERHEAD + 1) { + return; + } + p = write_retire_connection_id_frame(p, &fr->retire_connection_id); + break; + case NGTCP2_FRAME_PATH_CHALLENGE: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_PATH_CHALLENGE_FRAME_OVERHEAD + 1) { + return; + } + p = write_path_challenge_frame(p, &fr->path_challenge); + break; + case NGTCP2_FRAME_PATH_RESPONSE: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_PATH_RESPONSE_FRAME_OVERHEAD + 1) { + return; + } + p = write_path_response_frame(p, &fr->path_response); + break; + case NGTCP2_FRAME_CONNECTION_CLOSE: + case NGTCP2_FRAME_CONNECTION_CLOSE_APP: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_CONNECTION_CLOSE_FRAME_OVERHEAD + 1) { + return; + } + p = write_connection_close_frame(p, &fr->connection_close); + break; + case NGTCP2_FRAME_HANDSHAKE_DONE: + if (ngtcp2_buf_left(&qlog->buf) < + NGTCP2_QLOG_HANDSHAKE_DONE_FRAME_OVERHEAD + 1) { + return; + } + p = write_handshake_done_frame(p, &fr->handshake_done); + break; + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_DATAGRAM_FRAME_OVERHEAD + 1) { + return; + } + p = write_datagram_frame(p, &fr->datagram); + break; + default: + ngtcp2_unreachable(); + } + + *p++ = ','; + + qlog->buf.last = p; +} + +void ngtcp2_qlog_pkt_received_start(ngtcp2_qlog *qlog) { + qlog_pkt_write_start(qlog, /* sent = */ 0); +} + +void ngtcp2_qlog_pkt_received_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, + size_t pktlen) { + qlog_pkt_write_end(qlog, hd, pktlen); +} + +void ngtcp2_qlog_pkt_sent_start(ngtcp2_qlog *qlog) { + qlog_pkt_write_start(qlog, /* sent = */ 1); +} + +void ngtcp2_qlog_pkt_sent_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, + size_t pktlen) { + qlog_pkt_write_end(qlog, hd, pktlen); +} + +void ngtcp2_qlog_parameters_set_transport_params( + ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server, + ngtcp2_qlog_side side) { + uint8_t buf[1024]; + uint8_t *p = buf; + const ngtcp2_preferred_addr *paddr; + const ngtcp2_sockaddr_in *sa_in; + const ngtcp2_sockaddr_in6 *sa_in6; + + if (!qlog->write) { + return; + } + + *p++ = '\x1e'; + *p++ = '{'; + p = qlog_write_time(qlog, p); + p = write_verbatim( + p, ",\"name\":\"transport:parameters_set\",\"data\":{\"owner\":"); + + if (side == NGTCP2_QLOG_SIDE_LOCAL) { + p = write_string(p, "local"); + } else { + p = write_string(p, "remote"); + } + + *p++ = ','; + p = write_pair_cid(p, "initial_source_connection_id", ¶ms->initial_scid); + *p++ = ','; + if (side == (server ? NGTCP2_QLOG_SIDE_LOCAL : NGTCP2_QLOG_SIDE_REMOTE)) { + p = write_pair_cid(p, "original_destination_connection_id", + ¶ms->original_dcid); + *p++ = ','; + } + if (params->retry_scid_present) { + p = write_pair_cid(p, "retry_source_connection_id", ¶ms->retry_scid); + *p++ = ','; + } + if (params->stateless_reset_token_present) { + p = write_verbatim(p, "\"stateless_reset_token\":{"); + p = write_pair_hex(p, "data", params->stateless_reset_token, + sizeof(params->stateless_reset_token)); + *p++ = '}'; + *p++ = ','; + } + p = write_pair_bool(p, "disable_active_migration", + params->disable_active_migration); + *p++ = ','; + p = write_pair_duration(p, "max_idle_timeout", params->max_idle_timeout); + *p++ = ','; + p = write_pair_number(p, "max_udp_payload_size", + params->max_udp_payload_size); + *p++ = ','; + p = write_pair_number(p, "ack_delay_exponent", params->ack_delay_exponent); + *p++ = ','; + p = write_pair_duration(p, "max_ack_delay", params->max_ack_delay); + *p++ = ','; + p = write_pair_number(p, "active_connection_id_limit", + params->active_connection_id_limit); + *p++ = ','; + p = write_pair_number(p, "initial_max_data", params->initial_max_data); + *p++ = ','; + p = write_pair_number(p, "initial_max_stream_data_bidi_local", + params->initial_max_stream_data_bidi_local); + *p++ = ','; + p = write_pair_number(p, "initial_max_stream_data_bidi_remote", + params->initial_max_stream_data_bidi_remote); + *p++ = ','; + p = write_pair_number(p, "initial_max_stream_data_uni", + params->initial_max_stream_data_uni); + *p++ = ','; + p = write_pair_number(p, "initial_max_streams_bidi", + params->initial_max_streams_bidi); + *p++ = ','; + p = write_pair_number(p, "initial_max_streams_uni", + params->initial_max_streams_uni); + if (params->preferred_address_present) { + *p++ = ','; + paddr = ¶ms->preferred_address; + p = write_string(p, "preferred_address"); + *p++ = ':'; + *p++ = '{'; + + if (paddr->ipv4_present) { + sa_in = &paddr->ipv4; + + p = write_pair_hex(p, "ip_v4", (const uint8_t *)&sa_in->sin_addr, + sizeof(sa_in->sin_addr)); + *p++ = ','; + p = write_pair_number(p, "port_v4", ngtcp2_ntohs(sa_in->sin_port)); + *p++ = ','; + } + + if (paddr->ipv6_present) { + sa_in6 = &paddr->ipv6; + + p = write_pair_hex(p, "ip_v6", (const uint8_t *)&sa_in6->sin6_addr, + sizeof(sa_in6->sin6_addr)); + *p++ = ','; + p = write_pair_number(p, "port_v6", ngtcp2_ntohs(sa_in6->sin6_port)); + *p++ = ','; + } + + p = write_pair_cid(p, "connection_id", &paddr->cid); + p = write_verbatim(p, ",\"stateless_reset_token\":{"); + p = write_pair_hex(p, "data", paddr->stateless_reset_token, + sizeof(paddr->stateless_reset_token)); + *p++ = '}'; + *p++ = '}'; + } + *p++ = ','; + p = write_pair_number(p, "max_datagram_frame_size", + params->max_datagram_frame_size); + *p++ = ','; + p = write_pair_bool(p, "grease_quic_bit", params->grease_quic_bit); + p = write_verbatim(p, "}}\n"); + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf, + (size_t)(p - buf)); +} + +void ngtcp2_qlog_metrics_updated(ngtcp2_qlog *qlog, + const ngtcp2_conn_stat *cstat) { + uint8_t buf[1024]; + uint8_t *p = buf; + + if (!qlog->write) { + return; + } + + *p++ = '\x1e'; + *p++ = '{'; + p = qlog_write_time(qlog, p); + p = write_verbatim(p, ",\"name\":\"recovery:metrics_updated\",\"data\":{"); + + if (cstat->min_rtt != UINT64_MAX) { + p = write_pair_duration(p, "min_rtt", cstat->min_rtt); + *p++ = ','; + } + p = write_pair_duration(p, "smoothed_rtt", cstat->smoothed_rtt); + *p++ = ','; + p = write_pair_duration(p, "latest_rtt", cstat->latest_rtt); + *p++ = ','; + p = write_pair_duration(p, "rtt_variance", cstat->rttvar); + *p++ = ','; + p = write_pair_number(p, "pto_count", cstat->pto_count); + *p++ = ','; + p = write_pair_number(p, "congestion_window", cstat->cwnd); + *p++ = ','; + p = write_pair_number(p, "bytes_in_flight", cstat->bytes_in_flight); + if (cstat->ssthresh != UINT64_MAX) { + *p++ = ','; + p = write_pair_number(p, "ssthresh", cstat->ssthresh); + } + + p = write_verbatim(p, "}}\n"); + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf, + (size_t)(p - buf)); +} + +void ngtcp2_qlog_pkt_lost(ngtcp2_qlog *qlog, ngtcp2_rtb_entry *ent) { + uint8_t buf[256]; + uint8_t *p = buf; + ngtcp2_pkt_hd hd = {0}; + + if (!qlog->write) { + return; + } + + *p++ = '\x1e'; + *p++ = '{'; + p = qlog_write_time(qlog, p); + p = write_verbatim( + p, ",\"name\":\"recovery:packet_lost\",\"data\":{\"header\":"); + + hd.type = ent->hd.type; + hd.flags = ent->hd.flags; + hd.pkt_num = ent->hd.pkt_num; + + p = write_pkt_hd(p, &hd); + p = write_verbatim(p, "}}\n"); + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf, + (size_t)(p - buf)); +} + +void ngtcp2_qlog_retry_pkt_received(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, + const ngtcp2_pkt_retry *retry) { + uint8_t rawbuf[1024]; + ngtcp2_buf buf; + + if (!qlog->write) { + return; + } + + ngtcp2_buf_init(&buf, rawbuf, sizeof(rawbuf)); + + *buf.last++ = '\x1e'; + *buf.last++ = '{'; + buf.last = qlog_write_time(qlog, buf.last); + buf.last = write_verbatim( + buf.last, + ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); + + if (ngtcp2_buf_left(&buf) < + NGTCP2_QLOG_PKT_HD_OVERHEAD + hd->tokenlen * 2 + + sizeof(",\"retry_token\":{\"data\":\"\"}}}\n") - 1 + + retry->tokenlen * 2) { + return; + } + + buf.last = write_pkt_hd(buf.last, hd); + buf.last = write_verbatim(buf.last, ",\"retry_token\":{"); + buf.last = write_pair_hex(buf.last, "data", retry->token, retry->tokenlen); + buf.last = write_verbatim(buf.last, "}}}\n"); + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf.pos, + ngtcp2_buf_len(&buf)); +} + +void ngtcp2_qlog_stateless_reset_pkt_received( + ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr) { + uint8_t buf[256]; + uint8_t *p = buf; + ngtcp2_pkt_hd hd = {0}; + + if (!qlog->write) { + return; + } + + hd.type = NGTCP2_PKT_STATELESS_RESET; + + *p++ = '\x1e'; + *p++ = '{'; + p = qlog_write_time(qlog, p); + p = write_verbatim( + p, ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); + p = write_pkt_hd(p, &hd); + *p++ = ','; + p = write_pair_hex(p, "stateless_reset_token", sr->stateless_reset_token, + NGTCP2_STATELESS_RESET_TOKENLEN); + p = write_verbatim(p, "}}\n"); + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf, + (size_t)(p - buf)); +} + +void ngtcp2_qlog_version_negotiation_pkt_received(ngtcp2_qlog *qlog, + const ngtcp2_pkt_hd *hd, + const uint32_t *sv, + size_t nsv) { + uint8_t rawbuf[512]; + ngtcp2_buf buf; + size_t i; + uint32_t v; + + if (!qlog->write) { + return; + } + + ngtcp2_buf_init(&buf, rawbuf, sizeof(rawbuf)); + + *buf.last++ = '\x1e'; + *buf.last++ = '{'; + buf.last = qlog_write_time(qlog, buf.last); + buf.last = write_verbatim( + buf.last, + ",\"name\":\"transport:packet_received\",\"data\":{\"header\":"); + buf.last = write_pkt_hd(buf.last, hd); + buf.last = write_verbatim(buf.last, ",\"supported_versions\":["); + + if (nsv) { + if (ngtcp2_buf_left(&buf) < + (sizeof("\"xxxxxxxx\",") - 1) * nsv - 1 + sizeof("]}}\n") - 1) { + return; + } + + v = ngtcp2_htonl(sv[0]); + buf.last = write_hex(buf.last, (const uint8_t *)&v, sizeof(v)); + + for (i = 1; i < nsv; ++i) { + *buf.last++ = ','; + v = ngtcp2_htonl(sv[i]); + buf.last = write_hex(buf.last, (const uint8_t *)&v, sizeof(v)); + } + } + + buf.last = write_verbatim(buf.last, "]}}\n"); + + qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf.pos, + ngtcp2_buf_len(&buf)); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h new file mode 100644 index 0000000..b9107c0 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h @@ -0,0 +1,161 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_QLOG_H +#define NGTCP2_QLOG_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_pkt.h" +#include "ngtcp2_cc.h" +#include "ngtcp2_buf.h" +#include "ngtcp2_rtb.h" + +/* NGTCP2_QLOG_BUFLEN is the length of heap allocated buffer for + qlog. */ +#define NGTCP2_QLOG_BUFLEN 4096 + +typedef enum ngtcp2_qlog_side { + NGTCP2_QLOG_SIDE_LOCAL, + NGTCP2_QLOG_SIDE_REMOTE, +} ngtcp2_qlog_side; + +typedef struct ngtcp2_qlog { + /* write is a callback function to write qlog. */ + ngtcp2_qlog_write write; + /* ts is the initial timestamp */ + ngtcp2_tstamp ts; + /* last_ts is the timestamp observed last time. */ + ngtcp2_tstamp last_ts; + /* buf is a heap allocated buffer to write exclusively + packet_received and packet_sent. */ + ngtcp2_buf buf; + /* user_data is an opaque pointer which is passed to write + callback. */ + void *user_data; +} ngtcp2_qlog; + +/* + * ngtcp2_qlog_init initializes |qlog|. + */ +void ngtcp2_qlog_init(ngtcp2_qlog *qlog, ngtcp2_qlog_write write, + ngtcp2_tstamp ts, void *user_data); + +/* + * ngtcp2_qlog_start writes qlog preamble. + */ +void ngtcp2_qlog_start(ngtcp2_qlog *qlog, const ngtcp2_cid *odcid, int server); + +/* + * ngtcp2_qlog_end writes closing part of qlog. + */ +void ngtcp2_qlog_end(ngtcp2_qlog *qlog); + +/* + * ngtcp2_qlog_write_frame writes |fr| to qlog->buf. + * ngtcp2_qlog_pkt_received_start or ngtcp2_qlog_pkt_sent_start must + * be called before calling this function. + */ +void ngtcp2_qlog_write_frame(ngtcp2_qlog *qlog, const ngtcp2_frame *fr); + +/* + * ngtcp2_qlog_pkt_received_start starts to write packet_received + * event. It initializes qlog->buf. It writes qlog to qlog->buf. + * ngtcp2_qlog_pkt_received_end will flush the content of qlog->buf to + * write callback. + */ +void ngtcp2_qlog_pkt_received_start(ngtcp2_qlog *qlog); + +/* + * ngtcp2_qlog_pkt_received_end ends packet_received event and sends + * the content of qlog->buf to qlog->write callback. + */ +void ngtcp2_qlog_pkt_received_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, + size_t pktlen); + +/* + * ngtcp2_qlog_pkt_sent_start starts to write packet_sent event. It + * initializes qlog->buf. It writes qlog to qlog->buf. + * ngtcp2_qlog_pkt_sent_end will flush the content of qlog->buf to + * write callback. + */ +void ngtcp2_qlog_pkt_sent_start(ngtcp2_qlog *qlog); + +/* + * ngtcp2_qlog_pkt_sent_end ends packet_sent event and sends the + * content of qlog->buf to qlog->write callback. + */ +void ngtcp2_qlog_pkt_sent_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, + size_t pktlen); + +/* + * ngtcp2_qlog_parameters_set_transport_params writes |params| to qlog + * as parameters_set event. |server| is nonzero if the local endpoint + * is server. If |local| is nonzero, it is "owner" field becomes + * "local", otherwise "remote". + */ +void ngtcp2_qlog_parameters_set_transport_params( + ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server, + ngtcp2_qlog_side side); + +/* + * ngtcp2_qlog_metrics_updated writes metrics_updated event of + * recovery category. + */ +void ngtcp2_qlog_metrics_updated(ngtcp2_qlog *qlog, + const ngtcp2_conn_stat *cstat); + +/* + * ngtcp2_qlog_pkt_lost writes packet_lost event. + */ +void ngtcp2_qlog_pkt_lost(ngtcp2_qlog *qlog, ngtcp2_rtb_entry *ent); + +/* + * ngtcp2_qlog_retry_pkt_received writes packet_received event for a + * received Retry packet. + */ +void ngtcp2_qlog_retry_pkt_received(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd, + const ngtcp2_pkt_retry *retry); + +/* + * ngtcp2_qlog_stateless_reset_pkt_received writes packet_received + * event for a received Stateless Reset packet. + */ +void ngtcp2_qlog_stateless_reset_pkt_received( + ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr); + +/* + * ngtcp2_qlog_version_negotiation_pkt_received writes packet_received + * event for a received Version Negotiation packet. + */ +void ngtcp2_qlog_version_negotiation_pkt_received(ngtcp2_qlog *qlog, + const ngtcp2_pkt_hd *hd, + const uint32_t *sv, + size_t nsv); + +#endif /* NGTCP2_QLOG_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c new file mode 100644 index 0000000..9379496 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c @@ -0,0 +1,61 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_range.h" +#include "ngtcp2_macro.h" + +void ngtcp2_range_init(ngtcp2_range *r, uint64_t begin, uint64_t end) { + r->begin = begin; + r->end = end; +} + +ngtcp2_range ngtcp2_range_intersect(const ngtcp2_range *a, + const ngtcp2_range *b) { + ngtcp2_range r = {0, 0}; + uint64_t begin = ngtcp2_max(a->begin, b->begin); + uint64_t end = ngtcp2_min(a->end, b->end); + if (begin < end) { + ngtcp2_range_init(&r, begin, end); + } + return r; +} + +uint64_t ngtcp2_range_len(const ngtcp2_range *r) { return r->end - r->begin; } + +int ngtcp2_range_eq(const ngtcp2_range *a, const ngtcp2_range *b) { + return a->begin == b->begin && a->end == b->end; +} + +void ngtcp2_range_cut(ngtcp2_range *left, ngtcp2_range *right, + const ngtcp2_range *a, const ngtcp2_range *b) { + /* Assume that b is included in a */ + left->begin = a->begin; + left->end = b->begin; + right->begin = b->end; + right->end = a->end; +} + +int ngtcp2_range_not_after(const ngtcp2_range *a, const ngtcp2_range *b) { + return a->end <= b->end; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h new file mode 100644 index 0000000..a776c4e --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h @@ -0,0 +1,80 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_RANGE_H +#define NGTCP2_RANGE_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +/* + * ngtcp2_range represents half-closed range [begin, end). + */ +typedef struct ngtcp2_range { + uint64_t begin; + uint64_t end; +} ngtcp2_range; + +/* + * ngtcp2_range_init initializes |r| with the range [|begin|, |end|). + */ +void ngtcp2_range_init(ngtcp2_range *r, uint64_t begin, uint64_t end); + +/* + * ngtcp2_range_intersect returns the intersection of |a| and |b|. If + * they do not overlap, it returns empty range. + */ +ngtcp2_range ngtcp2_range_intersect(const ngtcp2_range *a, + const ngtcp2_range *b); + +/* + * ngtcp2_range_len returns the length of |r|. + */ +uint64_t ngtcp2_range_len(const ngtcp2_range *r); + +/* + * ngtcp2_range_eq returns nonzero if |a| equals |b|, such that + * a->begin == b->begin, and a->end == b->end hold. + */ +int ngtcp2_range_eq(const ngtcp2_range *a, const ngtcp2_range *b); + +/* + * ngtcp2_range_cut returns the left and right range after removing + * |b| from |a|. This function assumes that |a| completely includes + * |b|. In other words, a->begin <= b->begin and b->end <= a->end + * hold. + */ +void ngtcp2_range_cut(ngtcp2_range *left, ngtcp2_range *right, + const ngtcp2_range *a, const ngtcp2_range *b); + +/* + * ngtcp2_range_not_after returns nonzero if the right edge of |a| + * does not go beyond of the right edge of |b|. + */ +int ngtcp2_range_not_after(const ngtcp2_range *a, const ngtcp2_range *b); + +#endif /* NGTCP2_RANGE_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h new file mode 100644 index 0000000..4cb4088 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h @@ -0,0 +1,40 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_RCVRY_H +#define NGTCP2_RCVRY_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +/* NGTCP2_PKT_THRESHOLD is kPacketThreshold described in RFC 9002. */ +#define NGTCP2_PKT_THRESHOLD 3 + +/* NGTCP2_GRANULARITY is kGranularity described in RFC 9002. */ +#define NGTCP2_GRANULARITY NGTCP2_MILLISECONDS + +#endif /* NGTCP2_RCVRY_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c new file mode 100644 index 0000000..a6b3f73 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c @@ -0,0 +1,120 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_ringbuf.h" + +#include <assert.h> +#ifdef WIN32 +# include <intrin.h> +#endif + +#include "ngtcp2_macro.h" + +#if defined(_MSC_VER) && defined(_M_ARM64) +unsigned int __popcnt(unsigned int x) { + unsigned int c = 0; + for (; x; ++c) { + x &= x - 1; + } + return c; +} +#endif + +int ngtcp2_ringbuf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, + const ngtcp2_mem *mem) { + uint8_t *buf = ngtcp2_mem_malloc(mem, nmemb * size); + if (buf == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_ringbuf_buf_init(rb, nmemb, size, buf, mem); + + return 0; +} + +void ngtcp2_ringbuf_buf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, + uint8_t *buf, const ngtcp2_mem *mem) { +#ifdef WIN32 + assert(1 == __popcnt((unsigned int)nmemb)); +#else + assert(1 == __builtin_popcount((unsigned int)nmemb)); +#endif + + rb->buf = buf; + rb->mem = mem; + rb->nmemb = nmemb; + rb->size = size; + rb->first = 0; + rb->len = 0; +} + +void ngtcp2_ringbuf_free(ngtcp2_ringbuf *rb) { + if (rb == NULL) { + return; + } + + ngtcp2_mem_free(rb->mem, rb->buf); +} + +void *ngtcp2_ringbuf_push_front(ngtcp2_ringbuf *rb) { + rb->first = (rb->first - 1) & (rb->nmemb - 1); + rb->len = ngtcp2_min(rb->nmemb, rb->len + 1); + + return (void *)&rb->buf[rb->first * rb->size]; +} + +void *ngtcp2_ringbuf_push_back(ngtcp2_ringbuf *rb) { + size_t offset = (rb->first + rb->len) & (rb->nmemb - 1); + + if (rb->len == rb->nmemb) { + rb->first = (rb->first + 1) & (rb->nmemb - 1); + } else { + ++rb->len; + } + + return (void *)&rb->buf[offset * rb->size]; +} + +void ngtcp2_ringbuf_pop_front(ngtcp2_ringbuf *rb) { + rb->first = (rb->first + 1) & (rb->nmemb - 1); + --rb->len; +} + +void ngtcp2_ringbuf_pop_back(ngtcp2_ringbuf *rb) { + assert(rb->len); + --rb->len; +} + +void ngtcp2_ringbuf_resize(ngtcp2_ringbuf *rb, size_t len) { + assert(len <= rb->nmemb); + rb->len = len; +} + +void *ngtcp2_ringbuf_get(ngtcp2_ringbuf *rb, size_t offset) { + assert(offset < rb->len); + offset = (rb->first + offset) & (rb->nmemb - 1); + return &rb->buf[offset * rb->size]; +} + +int ngtcp2_ringbuf_full(ngtcp2_ringbuf *rb) { return rb->len == rb->nmemb; } diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h new file mode 100644 index 0000000..16635c9 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h @@ -0,0 +1,132 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_RINGBUF_H +#define NGTCP2_RINGBUF_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" + +typedef struct ngtcp2_ringbuf { + /* buf points to the underlying buffer. */ + uint8_t *buf; + const ngtcp2_mem *mem; + /* nmemb is the number of elements that can be stored in this ring + buffer. */ + size_t nmemb; + /* size is the size of each element. */ + size_t size; + /* first is the offset to the first element. */ + size_t first; + /* len is the number of elements actually stored. */ + size_t len; +} ngtcp2_ringbuf; + +/* + * ngtcp2_ringbuf_init initializes |rb|. |nmemb| is the number of + * elements that can be stored in this buffer. |size| is the size of + * each element. |size| must be power of 2. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_ringbuf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, + const ngtcp2_mem *mem); + +/* + * ngtcp2_ringbuf_buf_init initializes |rb| with given buffer and + * size. + */ +void ngtcp2_ringbuf_buf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size, + uint8_t *buf, const ngtcp2_mem *mem); + +/* + * ngtcp2_ringbuf_free frees resources allocated for |rb|. This + * function does not free the memory pointed by |rb|. + */ +void ngtcp2_ringbuf_free(ngtcp2_ringbuf *rb); + +/* ngtcp2_ringbuf_push_front moves the offset to the first element in + the buffer backward, and returns the pointer to the element. + Caller can store data to the buffer pointed by the returned + pointer. If this action exceeds the capacity of the ring buffer, + the last element is silently overwritten, and rb->len remains + unchanged. */ +void *ngtcp2_ringbuf_push_front(ngtcp2_ringbuf *rb); + +/* ngtcp2_ringbuf_push_back moves the offset to the last element in + the buffer forward, and returns the pointer to the element. Caller + can store data to the buffer pointed by the returned pointer. If + this action exceeds the capacity of the ring buffer, the first + element is silently overwritten, and rb->len remains unchanged. */ +void *ngtcp2_ringbuf_push_back(ngtcp2_ringbuf *rb); + +/* + * ngtcp2_ringbuf_pop_front removes first element in |rb|. + */ +void ngtcp2_ringbuf_pop_front(ngtcp2_ringbuf *rb); + +/* + * ngtcp2_ringbuf_pop_back removes the last element in |rb|. + */ +void ngtcp2_ringbuf_pop_back(ngtcp2_ringbuf *rb); + +/* ngtcp2_ringbuf_resize changes the number of elements stored. This + does not change the capacity of the underlying buffer. */ +void ngtcp2_ringbuf_resize(ngtcp2_ringbuf *rb, size_t len); + +/* ngtcp2_ringbuf_get returns the pointer to the element at + |offset|. */ +void *ngtcp2_ringbuf_get(ngtcp2_ringbuf *rb, size_t offset); + +/* ngtcp2_ringbuf_len returns the number of elements stored. */ +#define ngtcp2_ringbuf_len(RB) ((RB)->len) + +/* ngtcp2_ringbuf_full returns nonzero if |rb| is full. */ +int ngtcp2_ringbuf_full(ngtcp2_ringbuf *rb); + +/* ngtcp2_static_ringbuf_def defines ngtcp2_ringbuf struct wrapper + which uses a statically allocated buffer that is suitable for a + usage that does not change buffer size with ngtcp2_ringbuf_resize. + ngtcp2_ringbuf_free should never be called for rb field. */ +#define ngtcp2_static_ringbuf_def(NAME, NMEMB, SIZE) \ + typedef struct ngtcp2_static_ringbuf_##NAME { \ + ngtcp2_ringbuf rb; \ + uint8_t buf[(NMEMB) * (SIZE)]; \ + } ngtcp2_static_ringbuf_##NAME; \ + \ + static inline void ngtcp2_static_ringbuf_##NAME##_init( \ + ngtcp2_static_ringbuf_##NAME *srb) { \ + ngtcp2_ringbuf_buf_init(&srb->rb, (NMEMB), (SIZE), srb->buf, NULL); \ + } + +#endif /* NGTCP2_RINGBUF_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c new file mode 100644 index 0000000..9c3d75d --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c @@ -0,0 +1,319 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_rob.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_macro.h" + +int ngtcp2_rob_gap_new(ngtcp2_rob_gap **pg, uint64_t begin, uint64_t end, + const ngtcp2_mem *mem) { + *pg = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_rob_gap)); + if (*pg == NULL) { + return NGTCP2_ERR_NOMEM; + } + + (*pg)->range.begin = begin; + (*pg)->range.end = end; + + return 0; +} + +void ngtcp2_rob_gap_del(ngtcp2_rob_gap *g, const ngtcp2_mem *mem) { + ngtcp2_mem_free(mem, g); +} + +int ngtcp2_rob_data_new(ngtcp2_rob_data **pd, uint64_t offset, size_t chunk, + const ngtcp2_mem *mem) { + *pd = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_rob_data) + chunk); + if (*pd == NULL) { + return NGTCP2_ERR_NOMEM; + } + + (*pd)->range.begin = offset; + (*pd)->range.end = offset + chunk; + (*pd)->begin = (uint8_t *)(*pd) + sizeof(ngtcp2_rob_data); + (*pd)->end = (*pd)->begin + chunk; + + return 0; +} + +void ngtcp2_rob_data_del(ngtcp2_rob_data *d, const ngtcp2_mem *mem) { + ngtcp2_mem_free(mem, d); +} + +int ngtcp2_rob_init(ngtcp2_rob *rob, size_t chunk, const ngtcp2_mem *mem) { + int rv; + ngtcp2_rob_gap *g; + + ngtcp2_ksl_init(&rob->gapksl, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range), + mem); + + rv = ngtcp2_rob_gap_new(&g, 0, UINT64_MAX, mem); + if (rv != 0) { + goto fail_rob_gap_new; + } + + rv = ngtcp2_ksl_insert(&rob->gapksl, NULL, &g->range, g); + if (rv != 0) { + goto fail_gapksl_ksl_insert; + } + + ngtcp2_ksl_init(&rob->dataksl, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range), + mem); + + rob->chunk = chunk; + rob->mem = mem; + + return 0; + +fail_gapksl_ksl_insert: + ngtcp2_rob_gap_del(g, mem); +fail_rob_gap_new: + ngtcp2_ksl_free(&rob->gapksl); + return rv; +} + +void ngtcp2_rob_free(ngtcp2_rob *rob) { + ngtcp2_ksl_it it; + + if (rob == NULL) { + return; + } + + for (it = ngtcp2_ksl_begin(&rob->dataksl); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + ngtcp2_rob_data_del(ngtcp2_ksl_it_get(&it), rob->mem); + } + + for (it = ngtcp2_ksl_begin(&rob->gapksl); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + ngtcp2_rob_gap_del(ngtcp2_ksl_it_get(&it), rob->mem); + } + + ngtcp2_ksl_free(&rob->dataksl); + ngtcp2_ksl_free(&rob->gapksl); +} + +static int rob_write_data(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, + size_t len) { + size_t n; + int rv; + ngtcp2_rob_data *d; + ngtcp2_range range = {offset, offset + len}; + ngtcp2_ksl_it it; + + for (it = ngtcp2_ksl_lower_bound_compar(&rob->dataksl, &range, + ngtcp2_ksl_range_exclusive_compar); + len; ngtcp2_ksl_it_next(&it)) { + if (ngtcp2_ksl_it_end(&it)) { + d = NULL; + } else { + d = ngtcp2_ksl_it_get(&it); + } + + if (d == NULL || offset < d->range.begin) { + rv = ngtcp2_rob_data_new(&d, (offset / rob->chunk) * rob->chunk, + rob->chunk, rob->mem); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_ksl_insert(&rob->dataksl, &it, &d->range, d); + if (rv != 0) { + ngtcp2_rob_data_del(d, rob->mem); + return rv; + } + } + + n = (size_t)ngtcp2_min((uint64_t)len, d->range.begin + rob->chunk - offset); + memcpy(d->begin + (offset - d->range.begin), data, n); + offset += n; + data += n; + len -= n; + } + + return 0; +} + +int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, + size_t datalen) { + int rv; + ngtcp2_rob_gap *g; + ngtcp2_range m, l, r, q = {offset, offset + datalen}; + ngtcp2_ksl_it it; + + it = ngtcp2_ksl_lower_bound_compar(&rob->gapksl, &q, + ngtcp2_ksl_range_exclusive_compar); + + for (; !ngtcp2_ksl_it_end(&it);) { + g = ngtcp2_ksl_it_get(&it); + + m = ngtcp2_range_intersect(&q, &g->range); + if (!ngtcp2_range_len(&m)) { + break; + } + if (ngtcp2_range_eq(&g->range, &m)) { + ngtcp2_ksl_remove_hint(&rob->gapksl, &it, &it, &g->range); + ngtcp2_rob_gap_del(g, rob->mem); + rv = rob_write_data(rob, m.begin, data + (m.begin - offset), + (size_t)ngtcp2_range_len(&m)); + if (rv != 0) { + return rv; + } + + continue; + } + ngtcp2_range_cut(&l, &r, &g->range, &m); + if (ngtcp2_range_len(&l)) { + ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &l); + g->range = l; + + if (ngtcp2_range_len(&r)) { + ngtcp2_rob_gap *ng; + rv = ngtcp2_rob_gap_new(&ng, r.begin, r.end, rob->mem); + if (rv != 0) { + return rv; + } + rv = ngtcp2_ksl_insert(&rob->gapksl, &it, &ng->range, ng); + if (rv != 0) { + ngtcp2_rob_gap_del(ng, rob->mem); + return rv; + } + } + } else if (ngtcp2_range_len(&r)) { + ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &r); + g->range = r; + } + rv = rob_write_data(rob, m.begin, data + (m.begin - offset), + (size_t)ngtcp2_range_len(&m)); + if (rv != 0) { + return rv; + } + ngtcp2_ksl_it_next(&it); + } + return 0; +} + +int ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset) { + ngtcp2_rob_gap *g; + ngtcp2_rob_data *d; + ngtcp2_ksl_it it; + + it = ngtcp2_ksl_begin(&rob->gapksl); + + for (; !ngtcp2_ksl_it_end(&it);) { + g = ngtcp2_ksl_it_get(&it); + if (offset <= g->range.begin) { + break; + } + if (offset < g->range.end) { + ngtcp2_range r = {offset, g->range.end}; + ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &r); + g->range.begin = offset; + break; + } + ngtcp2_ksl_remove_hint(&rob->gapksl, &it, &it, &g->range); + ngtcp2_rob_gap_del(g, rob->mem); + } + + it = ngtcp2_ksl_begin(&rob->dataksl); + + for (; !ngtcp2_ksl_it_end(&it);) { + d = ngtcp2_ksl_it_get(&it); + if (offset < d->range.begin + rob->chunk) { + return 0; + } + ngtcp2_ksl_remove_hint(&rob->dataksl, &it, &it, &d->range); + ngtcp2_rob_data_del(d, rob->mem); + } + + return 0; +} + +size_t ngtcp2_rob_data_at(ngtcp2_rob *rob, const uint8_t **pdest, + uint64_t offset) { + ngtcp2_rob_gap *g; + ngtcp2_rob_data *d; + ngtcp2_ksl_it it; + + it = ngtcp2_ksl_begin(&rob->gapksl); + if (ngtcp2_ksl_it_end(&it)) { + return 0; + } + + g = ngtcp2_ksl_it_get(&it); + + if (g->range.begin <= offset) { + return 0; + } + + it = ngtcp2_ksl_begin(&rob->dataksl); + d = ngtcp2_ksl_it_get(&it); + + assert(d); + assert(d->range.begin <= offset); + assert(offset < d->range.begin + rob->chunk); + + *pdest = d->begin + (offset - d->range.begin); + + return (size_t)(ngtcp2_min(g->range.begin, d->range.begin + rob->chunk) - + offset); +} + +void ngtcp2_rob_pop(ngtcp2_rob *rob, uint64_t offset, size_t len) { + ngtcp2_ksl_it it; + ngtcp2_rob_data *d; + + it = ngtcp2_ksl_begin(&rob->dataksl); + d = ngtcp2_ksl_it_get(&it); + + assert(d); + + if (offset + len < d->range.begin + rob->chunk) { + return; + } + + ngtcp2_ksl_remove_hint(&rob->dataksl, NULL, &it, &d->range); + ngtcp2_rob_data_del(d, rob->mem); +} + +uint64_t ngtcp2_rob_first_gap_offset(ngtcp2_rob *rob) { + ngtcp2_ksl_it it = ngtcp2_ksl_begin(&rob->gapksl); + ngtcp2_rob_gap *g; + + if (ngtcp2_ksl_it_end(&it)) { + return UINT64_MAX; + } + + g = ngtcp2_ksl_it_get(&it); + + return g->range.begin; +} + +int ngtcp2_rob_data_buffered(ngtcp2_rob *rob) { + return ngtcp2_ksl_len(&rob->dataksl) != 0; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h new file mode 100644 index 0000000..c7688df --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h @@ -0,0 +1,197 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_ROB_H +#define NGTCP2_ROB_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" +#include "ngtcp2_range.h" +#include "ngtcp2_ksl.h" + +/* + * ngtcp2_rob_gap represents the gap, which is the range of stream + * data that is not received yet. + */ +typedef struct ngtcp2_rob_gap { + /* range is the range of this gap. */ + ngtcp2_range range; +} ngtcp2_rob_gap; + +/* + * ngtcp2_rob_gap_new allocates new ngtcp2_rob_gap object, and assigns + * its pointer to |*pg|. The caller should call ngtcp2_rob_gap_del to + * delete it when it is no longer used. The range of the gap is + * [begin, end). |mem| is custom memory allocator to allocate memory. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_rob_gap_new(ngtcp2_rob_gap **pg, uint64_t begin, uint64_t end, + const ngtcp2_mem *mem); + +/* + * ngtcp2_rob_gap_del deallocates |g|. It deallocates the memory + * pointed by |g| it self. |mem| is custom memory allocator to + * deallocate memory. + */ +void ngtcp2_rob_gap_del(ngtcp2_rob_gap *g, const ngtcp2_mem *mem); + +/* + * ngtcp2_rob_data holds the buffered stream data. + */ +typedef struct ngtcp2_rob_data { + /* range is the range of this gap. */ + ngtcp2_range range; + /* begin points to the buffer. */ + uint8_t *begin; + /* end points to the one beyond of the last byte of the buffer */ + uint8_t *end; +} ngtcp2_rob_data; + +/* + * ngtcp2_rob_data_new allocates new ngtcp2_rob_data object, and + * assigns its pointer to |*pd|. The caller should call + * ngtcp2_rob_data_del to delete it when it is no longer used. + * |offset| is the stream offset of the first byte of this data. + * |chunk| is the size of the buffer. |offset| must be multiple of + * |chunk|. |mem| is custom memory allocator to allocate memory. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_rob_data_new(ngtcp2_rob_data **pd, uint64_t offset, size_t chunk, + const ngtcp2_mem *mem); + +/* + * ngtcp2_rob_data_del deallocates |d|. It deallocates the memory + * pointed by |d| itself. |mem| is custom memory allocator to + * deallocate memory. + */ +void ngtcp2_rob_data_del(ngtcp2_rob_data *d, const ngtcp2_mem *mem); + +/* + * ngtcp2_rob is the reorder buffer which reassembles stream data + * received in out of order. + */ +typedef struct ngtcp2_rob { + /* gapksl maintains the range of offset which is not received + yet. Initially, its range is [0, UINT64_MAX). */ + ngtcp2_ksl gapksl; + /* dataksl maintains the list of buffers which store received data + ordered by stream offset. */ + ngtcp2_ksl dataksl; + /* mem is custom memory allocator */ + const ngtcp2_mem *mem; + /* chunk is the size of each buffer in data field */ + size_t chunk; +} ngtcp2_rob; + +/* + * ngtcp2_rob_init initializes |rob|. |chunk| is the size of buffer + * per chunk. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_rob_init(ngtcp2_rob *rob, size_t chunk, const ngtcp2_mem *mem); + +/* + * ngtcp2_rob_free frees resources allocated for |rob|. + */ +void ngtcp2_rob_free(ngtcp2_rob *rob); + +/* + * ngtcp2_rob_push adds new data of length |datalen| at the stream + * offset |offset|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data, + size_t datalen); + +/* + * ngtcp2_rob_remove_prefix removes gap up to |offset|, exclusive. It + * also removes data buffer if it is completely included in |offset|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset); + +/* + * ngtcp2_rob_data_at stores the pointer to the buffer of stream + * offset |offset| to |*pdest| if it is available, and returns the + * valid length of available data. If no data is available, it + * returns 0. + */ +size_t ngtcp2_rob_data_at(ngtcp2_rob *rob, const uint8_t **pdest, + uint64_t offset); + +/* + * ngtcp2_rob_pop clears data at stream offset |offset| of length + * |len|. + * + * |offset| must be the offset given in ngtcp2_rob_data_at. |len| + * must be the return value of ngtcp2_rob_data_at when |offset| is + * passed. + * + * Caller should call this function from offset 0 in non-decreasing + * order. + */ +void ngtcp2_rob_pop(ngtcp2_rob *rob, uint64_t offset, size_t len); + +/* + * ngtcp2_rob_first_gap_offset returns the offset to the first gap. + * If there is no gap, it returns UINT64_MAX. + */ +uint64_t ngtcp2_rob_first_gap_offset(ngtcp2_rob *rob); + +/* + * ngtcp2_rob_data_buffered returns nonzero if any data is buffered. + */ +int ngtcp2_rob_data_buffered(ngtcp2_rob *rob); + +#endif /* NGTCP2_ROB_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c new file mode 100644 index 0000000..7b50f98 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c @@ -0,0 +1,137 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_rst.h" + +#include <assert.h> + +#include "ngtcp2_rtb.h" +#include "ngtcp2_cc.h" +#include "ngtcp2_macro.h" + +void ngtcp2_rs_init(ngtcp2_rs *rs) { + rs->interval = UINT64_MAX; + rs->delivered = 0; + rs->prior_delivered = 0; + rs->prior_ts = 0; + rs->tx_in_flight = 0; + rs->lost = 0; + rs->prior_lost = 0; + rs->send_elapsed = 0; + rs->ack_elapsed = 0; + rs->is_app_limited = 0; +} + +void ngtcp2_rst_init(ngtcp2_rst *rst) { + ngtcp2_rs_init(&rst->rs); + ngtcp2_window_filter_init(&rst->wf, 12); + rst->delivered = 0; + rst->delivered_ts = 0; + rst->first_sent_ts = 0; + rst->app_limited = 0; + rst->next_round_delivered = 0; + rst->round_count = 0; + rst->is_cwnd_limited = 0; + rst->lost = 0; +} + +void ngtcp2_rst_on_pkt_sent(ngtcp2_rst *rst, ngtcp2_rtb_entry *ent, + const ngtcp2_conn_stat *cstat) { + if (cstat->bytes_in_flight == 0) { + rst->first_sent_ts = rst->delivered_ts = ent->ts; + } + ent->rst.first_sent_ts = rst->first_sent_ts; + ent->rst.delivered_ts = rst->delivered_ts; + ent->rst.delivered = rst->delivered; + ent->rst.is_app_limited = rst->app_limited != 0; + ent->rst.tx_in_flight = cstat->bytes_in_flight + ent->pktlen; + ent->rst.lost = rst->lost; +} + +int ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat, + uint64_t pkt_delivered) { + ngtcp2_rs *rs = &rst->rs; + uint64_t rate; + + if (rst->app_limited && rst->delivered > rst->app_limited) { + rst->app_limited = 0; + } + + if (pkt_delivered >= rst->next_round_delivered) { + rst->next_round_delivered = pkt_delivered; + ++rst->round_count; + } + + if (rs->prior_ts == 0) { + return 0; + } + + rs->interval = ngtcp2_max(rs->send_elapsed, rs->ack_elapsed); + + rs->delivered = rst->delivered - rs->prior_delivered; + rs->lost = rst->lost - rs->prior_lost; + + if (rs->interval < cstat->min_rtt) { + rs->interval = UINT64_MAX; + return 0; + } + + if (!rs->interval) { + return 0; + } + + rate = rs->delivered * NGTCP2_SECONDS / rs->interval; + + if (rate > ngtcp2_window_filter_get_best(&rst->wf) || !rst->app_limited) { + ngtcp2_window_filter_update(&rst->wf, rate, rst->round_count); + cstat->delivery_rate_sec = ngtcp2_window_filter_get_best(&rst->wf); + } + + return 0; +} + +void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent, + ngtcp2_tstamp ts) { + ngtcp2_rs *rs = &rst->rs; + + rst->delivered += ent->pktlen; + rst->delivered_ts = ts; + + if (ent->rst.delivered > rs->prior_delivered) { + rs->prior_delivered = ent->rst.delivered; + rs->prior_ts = ent->rst.delivered_ts; + rs->is_app_limited = ent->rst.is_app_limited; + rs->send_elapsed = ent->ts - ent->rst.first_sent_ts; + rs->ack_elapsed = rst->delivered_ts - ent->rst.delivered_ts; + rs->tx_in_flight = ent->rst.tx_in_flight; + rs->prior_lost = ent->rst.lost; + rst->first_sent_ts = ent->ts; + } +} + +void ngtcp2_rst_update_app_limited(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat) { + (void)rst; + (void)cstat; + /* TODO Not implemented */ +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h new file mode 100644 index 0000000..488c655 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h @@ -0,0 +1,85 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_RST_H +#define NGTCP2_RST_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_window_filter.h" + +typedef struct ngtcp2_rtb_entry ngtcp2_rtb_entry; + +/** + * @struct + * + * ngtcp2_rs contains connection state for delivery rate estimation. + */ +typedef struct ngtcp2_rs { + ngtcp2_duration interval; + uint64_t delivered; + uint64_t prior_delivered; + ngtcp2_tstamp prior_ts; + uint64_t tx_in_flight; + uint64_t lost; + uint64_t prior_lost; + ngtcp2_duration send_elapsed; + ngtcp2_duration ack_elapsed; + int is_app_limited; +} ngtcp2_rs; + +void ngtcp2_rs_init(ngtcp2_rs *rs); + +/* + * ngtcp2_rst implements delivery rate estimation described in + * https://tools.ietf.org/html/draft-cheng-iccrg-delivery-rate-estimation-00 + */ +typedef struct ngtcp2_rst { + ngtcp2_rs rs; + ngtcp2_window_filter wf; + uint64_t delivered; + ngtcp2_tstamp delivered_ts; + ngtcp2_tstamp first_sent_ts; + uint64_t app_limited; + uint64_t next_round_delivered; + uint64_t round_count; + uint64_t lost; + int is_cwnd_limited; +} ngtcp2_rst; + +void ngtcp2_rst_init(ngtcp2_rst *rst); + +void ngtcp2_rst_on_pkt_sent(ngtcp2_rst *rst, ngtcp2_rtb_entry *ent, + const ngtcp2_conn_stat *cstat); +int ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat, + uint64_t pkt_delivered); +void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent, + ngtcp2_tstamp ts); +void ngtcp2_rst_update_app_limited(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat); + +#endif /* NGTCP2_RST_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c new file mode 100644 index 0000000..7fb0cc7 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c @@ -0,0 +1,1676 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_rtb.h" + +#include <assert.h> +#include <string.h> + +#include "ngtcp2_macro.h" +#include "ngtcp2_conn.h" +#include "ngtcp2_log.h" +#include "ngtcp2_vec.h" +#include "ngtcp2_cc.h" +#include "ngtcp2_rcvry.h" +#include "ngtcp2_rst.h" +#include "ngtcp2_unreachable.h" + +int ngtcp2_frame_chain_new(ngtcp2_frame_chain **pfrc, const ngtcp2_mem *mem) { + *pfrc = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_frame_chain)); + if (*pfrc == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_frame_chain_init(*pfrc); + + return 0; +} + +int ngtcp2_frame_chain_objalloc_new(ngtcp2_frame_chain **pfrc, + ngtcp2_objalloc *objalloc) { + *pfrc = ngtcp2_objalloc_frame_chain_get(objalloc); + if (*pfrc == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_frame_chain_init(*pfrc); + + return 0; +} + +int ngtcp2_frame_chain_extralen_new(ngtcp2_frame_chain **pfrc, size_t extralen, + const ngtcp2_mem *mem) { + *pfrc = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_frame_chain) + extralen); + if (*pfrc == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_frame_chain_init(*pfrc); + + return 0; +} + +int ngtcp2_frame_chain_stream_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc, + size_t datacnt, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem) { + size_t need, avail = sizeof(ngtcp2_frame) - sizeof(ngtcp2_stream); + + if (datacnt > 1) { + need = sizeof(ngtcp2_vec) * (datacnt - 1); + + if (need > avail) { + return ngtcp2_frame_chain_extralen_new(pfrc, need - avail, mem); + } + } + + return ngtcp2_frame_chain_objalloc_new(pfrc, objalloc); +} + +int ngtcp2_frame_chain_crypto_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc, + size_t datacnt, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem) { + size_t need, avail = sizeof(ngtcp2_frame) - sizeof(ngtcp2_crypto); + + if (datacnt > 1) { + need = sizeof(ngtcp2_vec) * (datacnt - 1); + + if (need > avail) { + return ngtcp2_frame_chain_extralen_new(pfrc, need - avail, mem); + } + } + + return ngtcp2_frame_chain_objalloc_new(pfrc, objalloc); +} + +int ngtcp2_frame_chain_new_token_objalloc_new(ngtcp2_frame_chain **pfrc, + const uint8_t *token, + size_t tokenlen, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem) { + size_t avail = sizeof(ngtcp2_frame) - sizeof(ngtcp2_new_token); + int rv; + uint8_t *p; + ngtcp2_frame *fr; + + if (tokenlen > avail) { + rv = ngtcp2_frame_chain_extralen_new(pfrc, tokenlen - avail, mem); + } else { + rv = ngtcp2_frame_chain_objalloc_new(pfrc, objalloc); + } + if (rv != 0) { + return rv; + } + + fr = &(*pfrc)->fr; + fr->type = NGTCP2_FRAME_NEW_TOKEN; + + p = (uint8_t *)fr + sizeof(ngtcp2_new_token); + memcpy(p, token, tokenlen); + + fr->new_token.token = p; + fr->new_token.tokenlen = tokenlen; + + return 0; +} + +void ngtcp2_frame_chain_del(ngtcp2_frame_chain *frc, const ngtcp2_mem *mem) { + ngtcp2_frame_chain_binder *binder; + + if (frc == NULL) { + return; + } + + binder = frc->binder; + if (binder && --binder->refcount == 0) { + ngtcp2_mem_free(mem, binder); + } + + ngtcp2_mem_free(mem, frc); +} + +void ngtcp2_frame_chain_objalloc_del(ngtcp2_frame_chain *frc, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem) { + ngtcp2_frame_chain_binder *binder; + + if (frc == NULL) { + return; + } + + switch (frc->fr.type) { + case NGTCP2_FRAME_STREAM: + if (frc->fr.stream.datacnt && + sizeof(ngtcp2_vec) * (frc->fr.stream.datacnt - 1) > + sizeof(ngtcp2_frame) - sizeof(ngtcp2_stream)) { + ngtcp2_frame_chain_del(frc, mem); + + return; + } + + break; + case NGTCP2_FRAME_CRYPTO: + if (frc->fr.crypto.datacnt && + sizeof(ngtcp2_vec) * (frc->fr.crypto.datacnt - 1) > + sizeof(ngtcp2_frame) - sizeof(ngtcp2_crypto)) { + ngtcp2_frame_chain_del(frc, mem); + + return; + } + + break; + case NGTCP2_FRAME_NEW_TOKEN: + if (frc->fr.new_token.tokenlen > + sizeof(ngtcp2_frame) - sizeof(ngtcp2_new_token)) { + ngtcp2_frame_chain_del(frc, mem); + + return; + } + + break; + } + + binder = frc->binder; + if (binder && --binder->refcount == 0) { + ngtcp2_mem_free(mem, binder); + } + + frc->binder = NULL; + + ngtcp2_objalloc_frame_chain_release(objalloc, frc); +} + +void ngtcp2_frame_chain_init(ngtcp2_frame_chain *frc) { + frc->next = NULL; + frc->binder = NULL; +} + +void ngtcp2_frame_chain_list_objalloc_del(ngtcp2_frame_chain *frc, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem) { + ngtcp2_frame_chain *next; + + for (; frc; frc = next) { + next = frc->next; + + ngtcp2_frame_chain_objalloc_del(frc, objalloc, mem); + } +} + +int ngtcp2_frame_chain_binder_new(ngtcp2_frame_chain_binder **pbinder, + const ngtcp2_mem *mem) { + *pbinder = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_frame_chain_binder)); + if (*pbinder == NULL) { + return NGTCP2_ERR_NOMEM; + } + + return 0; +} + +int ngtcp2_bind_frame_chains(ngtcp2_frame_chain *a, ngtcp2_frame_chain *b, + const ngtcp2_mem *mem) { + ngtcp2_frame_chain_binder *binder; + int rv; + + assert(b->binder == NULL); + + if (a->binder == NULL) { + rv = ngtcp2_frame_chain_binder_new(&binder, mem); + if (rv != 0) { + return rv; + } + + a->binder = binder; + ++a->binder->refcount; + } + + b->binder = a->binder; + ++b->binder->refcount; + + return 0; +} + +static void rtb_entry_init(ngtcp2_rtb_entry *ent, const ngtcp2_pkt_hd *hd, + ngtcp2_frame_chain *frc, ngtcp2_tstamp ts, + size_t pktlen, uint16_t flags) { + memset(ent, 0, sizeof(*ent)); + + ent->hd.pkt_num = hd->pkt_num; + ent->hd.type = hd->type; + ent->hd.flags = hd->flags; + ent->frc = frc; + ent->ts = ts; + ent->lost_ts = UINT64_MAX; + ent->pktlen = pktlen; + ent->flags = flags; + ent->next = NULL; +} + +int ngtcp2_rtb_entry_objalloc_new(ngtcp2_rtb_entry **pent, + const ngtcp2_pkt_hd *hd, + ngtcp2_frame_chain *frc, ngtcp2_tstamp ts, + size_t pktlen, uint16_t flags, + ngtcp2_objalloc *objalloc) { + *pent = ngtcp2_objalloc_rtb_entry_get(objalloc); + if (*pent == NULL) { + return NGTCP2_ERR_NOMEM; + } + + rtb_entry_init(*pent, hd, frc, ts, pktlen, flags); + + return 0; +} + +void ngtcp2_rtb_entry_objalloc_del(ngtcp2_rtb_entry *ent, + ngtcp2_objalloc *objalloc, + ngtcp2_objalloc *frc_objalloc, + const ngtcp2_mem *mem) { + ngtcp2_frame_chain_list_objalloc_del(ent->frc, frc_objalloc, mem); + + ent->frc = NULL; + + ngtcp2_objalloc_rtb_entry_release(objalloc, ent); +} + +static int greater(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { + return *(int64_t *)lhs > *(int64_t *)rhs; +} + +void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_pktns_id pktns_id, + ngtcp2_strm *crypto, ngtcp2_rst *rst, ngtcp2_cc *cc, + ngtcp2_log *log, ngtcp2_qlog *qlog, + ngtcp2_objalloc *rtb_entry_objalloc, + ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) { + rtb->rtb_entry_objalloc = rtb_entry_objalloc; + rtb->frc_objalloc = frc_objalloc; + ngtcp2_ksl_init(&rtb->ents, greater, sizeof(int64_t), mem); + rtb->crypto = crypto; + rtb->rst = rst; + rtb->cc = cc; + rtb->log = log; + rtb->qlog = qlog; + rtb->mem = mem; + rtb->largest_acked_tx_pkt_num = -1; + rtb->num_ack_eliciting = 0; + rtb->num_retransmittable = 0; + rtb->num_pto_eliciting = 0; + rtb->probe_pkt_left = 0; + rtb->pktns_id = pktns_id; + rtb->cc_pkt_num = 0; + rtb->cc_bytes_in_flight = 0; + rtb->persistent_congestion_start_ts = UINT64_MAX; + rtb->num_lost_pkts = 0; + rtb->num_lost_pmtud_pkts = 0; +} + +void ngtcp2_rtb_free(ngtcp2_rtb *rtb) { + ngtcp2_ksl_it it; + + if (rtb == NULL) { + return; + } + + it = ngtcp2_ksl_begin(&rtb->ents); + + for (; !ngtcp2_ksl_it_end(&it); ngtcp2_ksl_it_next(&it)) { + ngtcp2_rtb_entry_objalloc_del(ngtcp2_ksl_it_get(&it), + rtb->rtb_entry_objalloc, rtb->frc_objalloc, + rtb->mem); + } + + ngtcp2_ksl_free(&rtb->ents); +} + +static void rtb_on_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, + ngtcp2_conn_stat *cstat) { + ngtcp2_rst_on_pkt_sent(rtb->rst, ent, cstat); + + assert(rtb->cc_pkt_num <= ent->hd.pkt_num); + + cstat->bytes_in_flight += ent->pktlen; + rtb->cc_bytes_in_flight += ent->pktlen; + + ngtcp2_rst_update_app_limited(rtb->rst, cstat); + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + ++rtb->num_ack_eliciting; + } + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) { + ++rtb->num_retransmittable; + } + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING) { + ++rtb->num_pto_eliciting; + } +} + +static size_t rtb_on_remove(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, + ngtcp2_conn_stat *cstat) { + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) { + assert(rtb->num_lost_pkts); + --rtb->num_lost_pkts; + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) { + assert(rtb->num_lost_pmtud_pkts); + --rtb->num_lost_pmtud_pkts; + } + + return 0; + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + assert(rtb->num_ack_eliciting); + --rtb->num_ack_eliciting; + } + + if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) && + !(ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED)) { + assert(rtb->num_retransmittable); + --rtb->num_retransmittable; + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING) { + assert(rtb->num_pto_eliciting); + --rtb->num_pto_eliciting; + } + + if (rtb->cc_pkt_num <= ent->hd.pkt_num) { + assert(cstat->bytes_in_flight >= ent->pktlen); + cstat->bytes_in_flight -= ent->pktlen; + + assert(rtb->cc_bytes_in_flight >= ent->pktlen); + rtb->cc_bytes_in_flight -= ent->pktlen; + + /* If PMTUD packet is lost, we do not report the lost bytes to the + caller in order to ignore loss of PMTUD packet. */ + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) { + return 0; + } + + return ent->pktlen; + } + + return 0; +} + +/* NGTCP2_RECLAIM_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_RECLAIM_FLAG_NONE 0x00u +/* NGTCP2_RECLAIM_FLAG_ON_LOSS indicates that frames are reclaimed + because of the packet loss.*/ +#define NGTCP2_RECLAIM_FLAG_ON_LOSS 0x01u + +/* + * rtb_reclaim_frame queues unacknowledged frames included in |ent| + * for retransmission. The re-queued frames are not deleted from + * |ent|. It returns the number of frames queued. |flags| is bitwise + * OR of 0 or more of NGTCP2_RECLAIM_FLAG_*. + */ +static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags, + ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_rtb_entry *ent) { + ngtcp2_frame_chain *frc, *nfrc, **pfrc = &pktns->tx.frq; + ngtcp2_frame *fr; + ngtcp2_strm *strm; + ngtcp2_range gap, range; + size_t num_reclaimed = 0; + int rv; + int streamfrq_empty; + + assert(ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE); + + /* TODO Reconsider the order of pfrc */ + for (frc = ent->frc; frc; frc = frc->next) { + fr = &frc->fr; + /* Check that a late ACK acknowledged this frame. */ + if (frc->binder && + (frc->binder->flags & NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK)) { + continue; + } + switch (frc->fr.type) { + case NGTCP2_FRAME_STREAM: + strm = ngtcp2_conn_find_stream(conn, fr->stream.stream_id); + if (strm == NULL) { + continue; + } + + gap = ngtcp2_strm_get_unacked_range_after(strm, fr->stream.offset); + + range.begin = fr->stream.offset; + range.end = fr->stream.offset + + ngtcp2_vec_len(fr->stream.data, fr->stream.datacnt); + range = ngtcp2_range_intersect(&range, &gap); + if (ngtcp2_range_len(&range) == 0) { + if (!fr->stream.fin) { + /* 0 length STREAM frame with offset == 0 must be + retransmitted if no non-empty data is sent to this stream + and no data in this stream is acknowledged. */ + if (fr->stream.offset != 0 || fr->stream.datacnt != 0 || + strm->tx.offset || (strm->flags & NGTCP2_STRM_FLAG_ANY_ACKED)) { + continue; + } + } else if (strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED) { + continue; + } + } + + if ((flags & NGTCP2_RECLAIM_FLAG_ON_LOSS) && + ent->hd.pkt_num != strm->tx.last_lost_pkt_num) { + strm->tx.last_lost_pkt_num = ent->hd.pkt_num; + ++strm->tx.loss_count; + } + + rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( + &nfrc, fr->stream.datacnt, rtb->frc_objalloc, rtb->mem); + if (rv != 0) { + return rv; + } + + nfrc->fr = *fr; + ngtcp2_vec_copy(nfrc->fr.stream.data, fr->stream.data, + fr->stream.datacnt); + + streamfrq_empty = ngtcp2_strm_streamfrq_empty(strm); + rv = ngtcp2_strm_streamfrq_push(strm, nfrc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(nfrc, rtb->frc_objalloc, rtb->mem); + return rv; + } + if (!ngtcp2_strm_is_tx_queued(strm)) { + strm->cycle = ngtcp2_conn_tx_strmq_first_cycle(conn); + rv = ngtcp2_conn_tx_strmq_push(conn, strm); + if (rv != 0) { + return rv; + } + } + if (streamfrq_empty) { + ++conn->tx.strmq_nretrans; + } + + ++num_reclaimed; + + continue; + case NGTCP2_FRAME_CRYPTO: + /* Don't resend CRYPTO frame if the whole region it contains has + been acknowledged */ + gap = ngtcp2_strm_get_unacked_range_after(rtb->crypto, fr->crypto.offset); + + range.begin = fr->crypto.offset; + range.end = fr->crypto.offset + + ngtcp2_vec_len(fr->crypto.data, fr->crypto.datacnt); + range = ngtcp2_range_intersect(&range, &gap); + if (ngtcp2_range_len(&range) == 0) { + continue; + } + + rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new( + &nfrc, fr->crypto.datacnt, rtb->frc_objalloc, rtb->mem); + if (rv != 0) { + return rv; + } + + nfrc->fr = *fr; + ngtcp2_vec_copy(nfrc->fr.crypto.data, fr->crypto.data, + fr->crypto.datacnt); + + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, + &nfrc->fr.crypto.offset, nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(nfrc, rtb->frc_objalloc, rtb->mem); + return rv; + } + + ++num_reclaimed; + + continue; + case NGTCP2_FRAME_NEW_TOKEN: + rv = ngtcp2_frame_chain_new_token_objalloc_new( + &nfrc, fr->new_token.token, fr->new_token.tokenlen, rtb->frc_objalloc, + rtb->mem); + if (rv != 0) { + return rv; + } + + rv = ngtcp2_bind_frame_chains(frc, nfrc, rtb->mem); + if (rv != 0) { + return rv; + } + + break; + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + continue; + default: + rv = ngtcp2_frame_chain_objalloc_new(&nfrc, rtb->frc_objalloc); + if (rv != 0) { + return rv; + } + + nfrc->fr = *fr; + + rv = ngtcp2_bind_frame_chains(frc, nfrc, rtb->mem); + if (rv != 0) { + return rv; + } + + break; + } + + ++num_reclaimed; + + nfrc->next = *pfrc; + *pfrc = nfrc; + pfrc = &nfrc->next; + } + + return (ngtcp2_ssize)num_reclaimed; +} + +/* + * conn_process_lost_datagram calls ngtcp2_lost_datagram callback for + * lost DATAGRAM frames. + */ +static int conn_process_lost_datagram(ngtcp2_conn *conn, + ngtcp2_rtb_entry *ent) { + ngtcp2_frame_chain *frc; + int rv; + + for (frc = ent->frc; frc; frc = frc->next) { + switch (frc->fr.type) { + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + assert(conn->callbacks.lost_datagram); + + rv = conn->callbacks.lost_datagram(conn, frc->fr.datagram.dgram_id, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + break; + } + } + + return 0; +} + +static int rtb_on_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it, + ngtcp2_rtb_entry *ent, ngtcp2_conn_stat *cstat, + ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_tstamp ts) { + int rv; + ngtcp2_ssize reclaimed; + ngtcp2_cc *cc = rtb->cc; + ngtcp2_cc_pkt pkt; + + ngtcp2_log_pkt_lost(rtb->log, ent->hd.pkt_num, ent->hd.type, ent->hd.flags, + ent->ts); + + if (rtb->qlog) { + ngtcp2_qlog_pkt_lost(rtb->qlog, ent); + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) { + ++rtb->num_lost_pmtud_pkts; + } else if (rtb->cc->on_pkt_lost) { + cc->on_pkt_lost(cc, cstat, + ngtcp2_cc_pkt_init(&pkt, ent->hd.pkt_num, ent->pktlen, + rtb->pktns_id, ent->ts, ent->rst.lost, + ent->rst.tx_in_flight, + ent->rst.is_app_limited), + ts); + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED) { + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 " has already been reclaimed on PTO", + ent->hd.pkt_num); + assert(!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)); + assert(UINT64_MAX == ent->lost_ts); + + ent->flags |= NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED; + ent->lost_ts = ts; + + ++rtb->num_lost_pkts; + + ngtcp2_ksl_it_next(it); + + return 0; + } + + if (conn->callbacks.lost_datagram && + (ent->flags & NGTCP2_RTB_ENTRY_FLAG_DATAGRAM)) { + rv = conn_process_lost_datagram(conn, ent); + if (rv != 0) { + return rv; + } + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) { + assert(ent->frc); + assert(!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)); + assert(UINT64_MAX == ent->lost_ts); + + reclaimed = + rtb_reclaim_frame(rtb, NGTCP2_RECLAIM_FLAG_ON_LOSS, conn, pktns, ent); + if (reclaimed < 0) { + return (int)reclaimed; + } + } + + ent->flags |= NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED; + ent->lost_ts = ts; + + ++rtb->num_lost_pkts; + + ngtcp2_ksl_it_next(it); + + return 0; +} + +int ngtcp2_rtb_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, + ngtcp2_conn_stat *cstat) { + int rv; + + rv = ngtcp2_ksl_insert(&rtb->ents, NULL, &ent->hd.pkt_num, ent); + if (rv != 0) { + return rv; + } + + rtb_on_add(rtb, ent, cstat); + + return 0; +} + +ngtcp2_ksl_it ngtcp2_rtb_head(ngtcp2_rtb *rtb) { + return ngtcp2_ksl_begin(&rtb->ents); +} + +static void rtb_remove(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it, + ngtcp2_rtb_entry **pent, ngtcp2_rtb_entry *ent, + ngtcp2_conn_stat *cstat) { + int rv; + (void)rv; + + rv = ngtcp2_ksl_remove_hint(&rtb->ents, it, it, &ent->hd.pkt_num); + assert(0 == rv); + rtb_on_remove(rtb, ent, cstat); + + assert(ent->next == NULL); + + ngtcp2_list_insert(ent, pent); +} + +static void conn_ack_crypto_data(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + uint64_t datalen) { + ngtcp2_buf_chain **pbufchain, *bufchain; + size_t left; + + for (pbufchain = &pktns->crypto.tx.data; *pbufchain;) { + left = ngtcp2_buf_len(&(*pbufchain)->buf); + if (left > datalen) { + (*pbufchain)->buf.pos += datalen; + return; + } + + bufchain = *pbufchain; + *pbufchain = bufchain->next; + + ngtcp2_mem_free(conn->mem, bufchain); + + datalen -= left; + + if (datalen == 0) { + return; + } + } + + assert(datalen == 0); + + return; +} + +static int rtb_process_acked_pkt(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, + ngtcp2_conn *conn) { + ngtcp2_frame_chain *frc; + uint64_t prev_stream_offset, stream_offset; + ngtcp2_strm *strm; + int rv; + uint64_t datalen; + ngtcp2_strm *crypto = rtb->crypto; + ngtcp2_pktns *pktns = NULL; + + if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) && conn->pmtud && + conn->pmtud->tx_pkt_num <= ent->hd.pkt_num) { + ngtcp2_pmtud_probe_success(conn->pmtud, ent->pktlen); + + conn->dcid.current.max_udp_payload_size = + ngtcp2_max(conn->dcid.current.max_udp_payload_size, ent->pktlen); + + if (ngtcp2_pmtud_finished(conn->pmtud)) { + ngtcp2_conn_stop_pmtud(conn); + } + } + + for (frc = ent->frc; frc; frc = frc->next) { + if (frc->binder) { + frc->binder->flags |= NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK; + } + + switch (frc->fr.type) { + case NGTCP2_FRAME_STREAM: + strm = ngtcp2_conn_find_stream(conn, frc->fr.stream.stream_id); + if (strm == NULL) { + break; + } + + strm->flags |= NGTCP2_STRM_FLAG_ANY_ACKED; + + if (frc->fr.stream.fin) { + strm->flags |= NGTCP2_STRM_FLAG_FIN_ACKED; + } + + prev_stream_offset = ngtcp2_strm_get_acked_offset(strm); + rv = ngtcp2_strm_ack_data( + strm, frc->fr.stream.offset, + ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt)); + if (rv != 0) { + return rv; + } + + if (conn->callbacks.acked_stream_data_offset) { + stream_offset = ngtcp2_strm_get_acked_offset(strm); + datalen = stream_offset - prev_stream_offset; + if (datalen == 0 && !frc->fr.stream.fin) { + break; + } + + rv = conn->callbacks.acked_stream_data_offset( + conn, strm->stream_id, prev_stream_offset, datalen, conn->user_data, + strm->stream_user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + } + + rv = ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_CRYPTO: + prev_stream_offset = ngtcp2_strm_get_acked_offset(crypto); + rv = ngtcp2_strm_ack_data( + crypto, frc->fr.crypto.offset, + ngtcp2_vec_len(frc->fr.crypto.data, frc->fr.crypto.datacnt)); + if (rv != 0) { + return rv; + } + + stream_offset = ngtcp2_strm_get_acked_offset(crypto); + datalen = stream_offset - prev_stream_offset; + if (datalen == 0) { + break; + } + + switch (rtb->pktns_id) { + case NGTCP2_PKTNS_ID_INITIAL: + pktns = conn->in_pktns; + break; + case NGTCP2_PKTNS_ID_HANDSHAKE: + pktns = conn->hs_pktns; + break; + case NGTCP2_PKTNS_ID_APPLICATION: + pktns = &conn->pktns; + break; + default: + ngtcp2_unreachable(); + } + + conn_ack_crypto_data(conn, pktns, datalen); + + break; + case NGTCP2_FRAME_RESET_STREAM: + strm = ngtcp2_conn_find_stream(conn, frc->fr.reset_stream.stream_id); + if (strm == NULL) { + break; + } + strm->flags |= NGTCP2_STRM_FLAG_RST_ACKED; + rv = ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm); + if (rv != 0) { + return rv; + } + break; + case NGTCP2_FRAME_RETIRE_CONNECTION_ID: + ngtcp2_conn_untrack_retired_dcid_seq(conn, + frc->fr.retire_connection_id.seq); + break; + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + if (!conn->callbacks.ack_datagram) { + break; + } + + rv = conn->callbacks.ack_datagram(conn, frc->fr.datagram.dgram_id, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + break; + } + } + return 0; +} + +static void rtb_on_pkt_acked(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) { + ngtcp2_cc *cc = rtb->cc; + ngtcp2_cc_pkt pkt; + + ngtcp2_rst_update_rate_sample(rtb->rst, ent, ts); + + cc->on_pkt_acked(cc, cstat, + ngtcp2_cc_pkt_init(&pkt, ent->hd.pkt_num, ent->pktlen, + rtb->pktns_id, ent->ts, ent->rst.lost, + ent->rst.tx_in_flight, + ent->rst.is_app_limited), + ts); + + if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_PROBE) && + (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) { + cstat->pto_count = 0; + } +} + +static void conn_verify_ecn(ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_cc *cc, ngtcp2_conn_stat *cstat, + const ngtcp2_ack *fr, size_t ecn_acked, + ngtcp2_tstamp largest_acked_sent_ts, + ngtcp2_tstamp ts) { + if (conn->tx.ecn.state == NGTCP2_ECN_STATE_FAILED) { + return; + } + + if ((ecn_acked && fr->type == NGTCP2_FRAME_ACK) || + (fr->type == NGTCP2_FRAME_ACK_ECN && + (pktns->rx.ecn.ack.ect0 > fr->ecn.ect0 || + pktns->rx.ecn.ack.ect1 > fr->ecn.ect1 || + pktns->rx.ecn.ack.ce > fr->ecn.ce || + (fr->ecn.ect0 - pktns->rx.ecn.ack.ect0) + + (fr->ecn.ce - pktns->rx.ecn.ack.ce) < + ecn_acked || + fr->ecn.ect0 > pktns->tx.ecn.ect0 || fr->ecn.ect1))) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, + "path is not ECN capable"); + conn->tx.ecn.state = NGTCP2_ECN_STATE_FAILED; + return; + } + + if (conn->tx.ecn.state != NGTCP2_ECN_STATE_CAPABLE && ecn_acked) { + ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "path is ECN capable"); + conn->tx.ecn.state = NGTCP2_ECN_STATE_CAPABLE; + } + + if (fr->type == NGTCP2_FRAME_ACK_ECN) { + if (largest_acked_sent_ts != UINT64_MAX && + fr->ecn.ce > pktns->rx.ecn.ack.ce) { + cc->congestion_event(cc, cstat, largest_acked_sent_ts, ts); + } + + pktns->rx.ecn.ack.ect0 = fr->ecn.ect0; + pktns->rx.ecn.ack.ect1 = fr->ecn.ect1; + pktns->rx.ecn.ack.ce = fr->ecn.ce; + } +} + +static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, + ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts); + +ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, + ngtcp2_conn_stat *cstat, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, ngtcp2_tstamp pkt_ts, + ngtcp2_tstamp ts) { + ngtcp2_rtb_entry *ent; + int64_t largest_ack = fr->largest_ack, min_ack; + size_t i; + int rv; + ngtcp2_ksl_it it; + ngtcp2_ssize num_acked = 0; + ngtcp2_tstamp largest_pkt_sent_ts = UINT64_MAX; + ngtcp2_tstamp largest_acked_sent_ts = UINT64_MAX; + int64_t pkt_num; + ngtcp2_cc *cc = rtb->cc; + ngtcp2_rtb_entry *acked_ent = NULL; + int ack_eliciting_pkt_acked = 0; + size_t ecn_acked = 0; + int verify_ecn = 0; + ngtcp2_cc_ack cc_ack = {0}; + size_t num_lost_pkts = rtb->num_lost_pkts - rtb->num_lost_pmtud_pkts; + + cc_ack.prior_bytes_in_flight = cstat->bytes_in_flight; + cc_ack.rtt = UINT64_MAX; + + if (conn && (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) && + (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR) && + largest_ack >= conn->pktns.crypto.tx.ckm->pkt_num) { + conn->flags &= (uint32_t) ~(NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED | + NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR); + conn->crypto.key_update.confirmed_ts = ts; + + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_CRY, "key update confirmed"); + } + + if (rtb->largest_acked_tx_pkt_num < largest_ack) { + rtb->largest_acked_tx_pkt_num = largest_ack; + verify_ecn = 1; + } + + /* Assume that ngtcp2_pkt_validate_ack(fr) returns 0 */ + it = ngtcp2_ksl_lower_bound(&rtb->ents, &largest_ack); + if (ngtcp2_ksl_it_end(&it)) { + if (conn && verify_ecn) { + conn_verify_ecn(conn, pktns, rtb->cc, cstat, fr, ecn_acked, + largest_acked_sent_ts, ts); + } + return 0; + } + + min_ack = largest_ack - (int64_t)fr->first_ack_range; + + for (; !ngtcp2_ksl_it_end(&it);) { + pkt_num = *(int64_t *)ngtcp2_ksl_it_key(&it); + + assert(pkt_num <= largest_ack); + + if (pkt_num < min_ack) { + break; + } + + ent = ngtcp2_ksl_it_get(&it); + + if (largest_ack == pkt_num) { + largest_pkt_sent_ts = ent->ts; + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + ack_eliciting_pkt_acked = 1; + } + + rtb_remove(rtb, &it, &acked_ent, ent, cstat); + ++num_acked; + } + + for (i = 0; i < fr->rangecnt;) { + largest_ack = min_ack - (int64_t)fr->ranges[i].gap - 2; + min_ack = largest_ack - (int64_t)fr->ranges[i].len; + + it = ngtcp2_ksl_lower_bound(&rtb->ents, &largest_ack); + if (ngtcp2_ksl_it_end(&it)) { + break; + } + + for (; !ngtcp2_ksl_it_end(&it);) { + pkt_num = *(int64_t *)ngtcp2_ksl_it_key(&it); + if (pkt_num < min_ack) { + break; + } + ent = ngtcp2_ksl_it_get(&it); + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) { + ack_eliciting_pkt_acked = 1; + } + + rtb_remove(rtb, &it, &acked_ent, ent, cstat); + ++num_acked; + } + + ++i; + } + + if (largest_pkt_sent_ts != UINT64_MAX && ack_eliciting_pkt_acked) { + cc_ack.rtt = pkt_ts - largest_pkt_sent_ts; + + rv = ngtcp2_conn_update_rtt(conn, cc_ack.rtt, fr->ack_delay_unscaled, ts); + if (rv == 0 && cc->new_rtt_sample) { + cc->new_rtt_sample(cc, cstat, ts); + } + } + + if (conn) { + for (ent = acked_ent; ent; ent = acked_ent) { + if (ent->hd.pkt_num >= pktns->tx.ecn.start_pkt_num && + (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ECN)) { + ++ecn_acked; + } + + assert(largest_acked_sent_ts == UINT64_MAX || + largest_acked_sent_ts <= ent->ts); + + largest_acked_sent_ts = ent->ts; + + rv = rtb_process_acked_pkt(rtb, ent, conn); + if (rv != 0) { + goto fail; + } + + if (ent->hd.pkt_num >= rtb->cc_pkt_num) { + assert(cc_ack.pkt_delivered <= ent->rst.delivered); + + cc_ack.bytes_delivered += ent->pktlen; + cc_ack.pkt_delivered = ent->rst.delivered; + } + + rtb_on_pkt_acked(rtb, ent, cstat, ts); + acked_ent = ent->next; + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, + rtb->frc_objalloc, rtb->mem); + } + + if (verify_ecn) { + conn_verify_ecn(conn, pktns, rtb->cc, cstat, fr, ecn_acked, + largest_acked_sent_ts, ts); + } + } else { + /* For unit tests */ + for (ent = acked_ent; ent; ent = acked_ent) { + rtb_on_pkt_acked(rtb, ent, cstat, ts); + acked_ent = ent->next; + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, + rtb->frc_objalloc, rtb->mem); + } + } + + if (rtb->cc->on_spurious_congestion && num_lost_pkts && + rtb->num_lost_pkts - rtb->num_lost_pmtud_pkts == 0) { + rtb->cc->on_spurious_congestion(cc, cstat, ts); + } + + ngtcp2_rst_on_ack_recv(rtb->rst, cstat, cc_ack.pkt_delivered); + + if (conn && num_acked > 0) { + rv = rtb_detect_lost_pkt(rtb, &cc_ack.bytes_lost, conn, pktns, cstat, ts); + if (rv != 0) { + return rv; + } + } + + rtb->rst->lost += cc_ack.bytes_lost; + + cc_ack.largest_acked_sent_ts = largest_acked_sent_ts; + cc->on_ack_recv(cc, cstat, &cc_ack, ts); + + return num_acked; + +fail: + for (ent = acked_ent; ent; ent = acked_ent) { + acked_ent = ent->next; + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, + rtb->frc_objalloc, rtb->mem); + } + + return rv; +} + +static int rtb_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat, + const ngtcp2_rtb_entry *ent, ngtcp2_duration loss_delay, + size_t pkt_thres, ngtcp2_tstamp ts) { + ngtcp2_tstamp loss_time; + + if (ent->ts + loss_delay <= ts || + rtb->largest_acked_tx_pkt_num >= ent->hd.pkt_num + (int64_t)pkt_thres) { + return 1; + } + + loss_time = cstat->loss_time[rtb->pktns_id]; + + if (loss_time == UINT64_MAX) { + loss_time = ent->ts + loss_delay; + } else { + loss_time = ngtcp2_min(loss_time, ent->ts + loss_delay); + } + + cstat->loss_time[rtb->pktns_id] = loss_time; + + return 0; +} + +/* + * rtb_compute_pkt_loss_delay computes loss delay. + */ +static ngtcp2_duration compute_pkt_loss_delay(const ngtcp2_conn_stat *cstat) { + /* 9/8 is kTimeThreshold */ + ngtcp2_duration loss_delay = + ngtcp2_max(cstat->latest_rtt, cstat->smoothed_rtt) * 9 / 8; + return ngtcp2_max(loss_delay, NGTCP2_GRANULARITY); +} + +/* + * conn_all_ecn_pkt_lost returns nonzero if all ECN QUIC packets are + * lost during validation period. + */ +static int conn_all_ecn_pkt_lost(ngtcp2_conn *conn) { + ngtcp2_pktns *in_pktns = conn->in_pktns; + ngtcp2_pktns *hs_pktns = conn->hs_pktns; + ngtcp2_pktns *pktns = &conn->pktns; + + return (!in_pktns || in_pktns->tx.ecn.validation_pkt_sent == + in_pktns->tx.ecn.validation_pkt_lost) && + (!hs_pktns || hs_pktns->tx.ecn.validation_pkt_sent == + hs_pktns->tx.ecn.validation_pkt_lost) && + pktns->tx.ecn.validation_pkt_sent == pktns->tx.ecn.validation_pkt_lost; +} + +static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost, + ngtcp2_conn *conn, ngtcp2_pktns *pktns, + ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) { + ngtcp2_rtb_entry *ent; + ngtcp2_duration loss_delay; + ngtcp2_ksl_it it; + ngtcp2_tstamp latest_ts, oldest_ts; + int64_t last_lost_pkt_num; + ngtcp2_duration loss_window, congestion_period; + ngtcp2_cc *cc = rtb->cc; + int rv; + uint64_t pkt_thres = + rtb->cc_bytes_in_flight / cstat->max_tx_udp_payload_size / 2; + size_t ecn_pkt_lost = 0; + ngtcp2_tstamp start_ts; + ngtcp2_duration pto = ngtcp2_conn_compute_pto(conn, pktns); + uint64_t bytes_lost = 0; + ngtcp2_duration max_ack_delay; + + pkt_thres = ngtcp2_max(pkt_thres, NGTCP2_PKT_THRESHOLD); + pkt_thres = ngtcp2_min(pkt_thres, 256); + cstat->loss_time[rtb->pktns_id] = UINT64_MAX; + loss_delay = compute_pkt_loss_delay(cstat); + + it = ngtcp2_ksl_lower_bound(&rtb->ents, &rtb->largest_acked_tx_pkt_num); + for (; !ngtcp2_ksl_it_end(&it); ngtcp2_ksl_it_next(&it)) { + ent = ngtcp2_ksl_it_get(&it); + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) { + break; + } + + if (rtb_pkt_lost(rtb, cstat, ent, loss_delay, (size_t)pkt_thres, ts)) { + /* All entries from ent are considered to be lost. */ + latest_ts = oldest_ts = ent->ts; + last_lost_pkt_num = ent->hd.pkt_num; + max_ack_delay = conn->remote.transport_params + ? conn->remote.transport_params->max_ack_delay + : 0; + + congestion_period = + (cstat->smoothed_rtt + + ngtcp2_max(4 * cstat->rttvar, NGTCP2_GRANULARITY) + max_ack_delay) * + NGTCP2_PERSISTENT_CONGESTION_THRESHOLD; + + start_ts = ngtcp2_max(rtb->persistent_congestion_start_ts, + cstat->first_rtt_sample_ts); + + for (; !ngtcp2_ksl_it_end(&it);) { + ent = ngtcp2_ksl_it_get(&it); + + if (last_lost_pkt_num == ent->hd.pkt_num + 1 && ent->ts >= start_ts) { + last_lost_pkt_num = ent->hd.pkt_num; + oldest_ts = ent->ts; + } else { + last_lost_pkt_num = -1; + } + + if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)) { + if (rtb->pktns_id != NGTCP2_PKTNS_ID_APPLICATION || + last_lost_pkt_num == -1 || + latest_ts - oldest_ts >= congestion_period) { + break; + } + ngtcp2_ksl_it_next(&it); + continue; + } + + if (ent->hd.pkt_num >= pktns->tx.ecn.start_pkt_num && + (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ECN)) { + ++ecn_pkt_lost; + } + + bytes_lost += rtb_on_remove(rtb, ent, cstat); + rv = rtb_on_pkt_lost(rtb, &it, ent, cstat, conn, pktns, ts); + if (rv != 0) { + return rv; + } + } + + /* If only PMTUD packets are lost, do not trigger congestion + event. */ + if (bytes_lost == 0) { + break; + } + + switch (conn->tx.ecn.state) { + case NGTCP2_ECN_STATE_TESTING: + if (conn->tx.ecn.validation_start_ts == UINT64_MAX) { + break; + } + if (ts - conn->tx.ecn.validation_start_ts < 3 * pto) { + pktns->tx.ecn.validation_pkt_lost += ecn_pkt_lost; + assert(pktns->tx.ecn.validation_pkt_sent >= + pktns->tx.ecn.validation_pkt_lost); + break; + } + conn->tx.ecn.state = NGTCP2_ECN_STATE_UNKNOWN; + /* fall through */ + case NGTCP2_ECN_STATE_UNKNOWN: + pktns->tx.ecn.validation_pkt_lost += ecn_pkt_lost; + assert(pktns->tx.ecn.validation_pkt_sent >= + pktns->tx.ecn.validation_pkt_lost); + if (conn_all_ecn_pkt_lost(conn)) { + conn->tx.ecn.state = NGTCP2_ECN_STATE_FAILED; + } + break; + default: + break; + } + + cc->congestion_event(cc, cstat, latest_ts, ts); + + loss_window = latest_ts - oldest_ts; + /* Persistent congestion situation is only evaluated for app + * packet number space and for the packets sent after handshake + * is confirmed. During handshake, there is not much packets + * sent and also people seem to do lots of effort not to trigger + * persistent congestion there, then it is a lot easier to just + * not enable it during handshake. + */ + if (rtb->pktns_id == NGTCP2_PKTNS_ID_APPLICATION && loss_window > 0) { + if (loss_window >= congestion_period) { + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "persistent congestion loss_window=%" PRIu64 + " congestion_period=%" PRIu64, + loss_window, congestion_period); + + /* Reset min_rtt, srtt, and rttvar here. Next new RTT + sample will be used to recalculate these values. */ + cstat->min_rtt = UINT64_MAX; + cstat->smoothed_rtt = conn->local.settings.initial_rtt; + cstat->rttvar = conn->local.settings.initial_rtt / 2; + cstat->first_rtt_sample_ts = UINT64_MAX; + + cc->on_persistent_congestion(cc, cstat, ts); + } + } + + break; + } + } + + ngtcp2_rtb_remove_excessive_lost_pkt(rtb, (size_t)pkt_thres); + + if (ppkt_lost) { + *ppkt_lost = bytes_lost; + } + + return 0; +} + +int ngtcp2_rtb_detect_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts) { + return rtb_detect_lost_pkt(rtb, /* ppkt_lost = */ NULL, conn, pktns, cstat, + ts); +} + +void ngtcp2_rtb_remove_excessive_lost_pkt(ngtcp2_rtb *rtb, size_t n) { + ngtcp2_ksl_it it = ngtcp2_ksl_end(&rtb->ents); + ngtcp2_rtb_entry *ent; + int rv; + (void)rv; + + for (; rtb->num_lost_pkts > n;) { + assert(ngtcp2_ksl_it_end(&it)); + ngtcp2_ksl_it_prev(&it); + ent = ngtcp2_ksl_it_get(&it); + + assert(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED); + + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "removing stale lost pkn=%" PRId64, ent->hd.pkt_num); + + --rtb->num_lost_pkts; + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) { + --rtb->num_lost_pmtud_pkts; + } + + rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num); + assert(0 == rv); + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, + rtb->frc_objalloc, rtb->mem); + } +} + +void ngtcp2_rtb_remove_expired_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_duration pto, + ngtcp2_tstamp ts) { + ngtcp2_ksl_it it; + ngtcp2_rtb_entry *ent; + int rv; + (void)rv; + + if (ngtcp2_ksl_len(&rtb->ents) == 0) { + return; + } + + it = ngtcp2_ksl_end(&rtb->ents); + + for (;;) { + assert(ngtcp2_ksl_it_end(&it)); + + ngtcp2_ksl_it_prev(&it); + ent = ngtcp2_ksl_it_get(&it); + + if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) || + ts - ent->lost_ts < pto) { + return; + } + + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "removing stale lost pkn=%" PRId64, ent->hd.pkt_num); + + --rtb->num_lost_pkts; + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) { + --rtb->num_lost_pmtud_pkts; + } + + rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num); + assert(0 == rv); + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, + rtb->frc_objalloc, rtb->mem); + + if (ngtcp2_ksl_len(&rtb->ents) == 0) { + return; + } + } +} + +ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(ngtcp2_rtb *rtb) { + ngtcp2_ksl_it it; + ngtcp2_rtb_entry *ent; + + if (ngtcp2_ksl_len(&rtb->ents) == 0) { + return UINT64_MAX; + } + + it = ngtcp2_ksl_end(&rtb->ents); + ngtcp2_ksl_it_prev(&it); + ent = ngtcp2_ksl_it_get(&it); + + if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)) { + return UINT64_MAX; + } + + return ent->lost_ts; +} + +static int rtb_on_pkt_lost_resched_move(ngtcp2_rtb *rtb, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, + ngtcp2_rtb_entry *ent) { + ngtcp2_frame_chain **pfrc, *frc; + ngtcp2_stream *sfr; + ngtcp2_strm *strm; + int rv; + int streamfrq_empty; + + ngtcp2_log_pkt_lost(rtb->log, ent->hd.pkt_num, ent->hd.type, ent->hd.flags, + ent->ts); + + if (rtb->qlog) { + ngtcp2_qlog_pkt_lost(rtb->qlog, ent); + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PROBE) { + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 + " is a probe packet, no retransmission is necessary", + ent->hd.pkt_num); + return 0; + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) { + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 + " is a PMTUD probe packet, no retransmission is necessary", + ent->hd.pkt_num); + return 0; + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) { + --rtb->num_lost_pkts; + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) { + --rtb->num_lost_pmtud_pkts; + } + + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 + " was declared lost and has already been retransmitted", + ent->hd.pkt_num); + return 0; + } + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED) { + ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV, + "pkn=%" PRId64 " has already been reclaimed on PTO", + ent->hd.pkt_num); + return 0; + } + + if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) && + (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_DATAGRAM) || + !conn->callbacks.lost_datagram)) { + /* PADDING only (or PADDING + ACK ) packets will have NULL + ent->frc. */ + return 0; + } + + pfrc = &ent->frc; + + for (; *pfrc;) { + switch ((*pfrc)->fr.type) { + case NGTCP2_FRAME_STREAM: + frc = *pfrc; + + *pfrc = frc->next; + frc->next = NULL; + sfr = &frc->fr.stream; + + strm = ngtcp2_conn_find_stream(conn, sfr->stream_id); + if (!strm) { + ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); + break; + } + streamfrq_empty = ngtcp2_strm_streamfrq_empty(strm); + rv = ngtcp2_strm_streamfrq_push(strm, frc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); + return rv; + } + if (!ngtcp2_strm_is_tx_queued(strm)) { + strm->cycle = ngtcp2_conn_tx_strmq_first_cycle(conn); + rv = ngtcp2_conn_tx_strmq_push(conn, strm); + if (rv != 0) { + return rv; + } + } + if (streamfrq_empty) { + ++conn->tx.strmq_nretrans; + } + break; + case NGTCP2_FRAME_CRYPTO: + frc = *pfrc; + + *pfrc = frc->next; + frc->next = NULL; + + rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, + &frc->fr.crypto.offset, frc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); + return rv; + } + break; + case NGTCP2_FRAME_DATAGRAM: + case NGTCP2_FRAME_DATAGRAM_LEN: + frc = *pfrc; + + if (conn->callbacks.lost_datagram) { + rv = conn->callbacks.lost_datagram(conn, frc->fr.datagram.dgram_id, + conn->user_data); + if (rv != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + } + + *pfrc = (*pfrc)->next; + + ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem); + break; + default: + pfrc = &(*pfrc)->next; + } + } + + *pfrc = pktns->tx.frq; + pktns->tx.frq = ent->frc; + ent->frc = NULL; + + return 0; +} + +int ngtcp2_rtb_remove_all(ngtcp2_rtb *rtb, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat) { + ngtcp2_rtb_entry *ent; + ngtcp2_ksl_it it; + int rv; + + it = ngtcp2_ksl_begin(&rtb->ents); + + for (; !ngtcp2_ksl_it_end(&it);) { + ent = ngtcp2_ksl_it_get(&it); + + rtb_on_remove(rtb, ent, cstat); + rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num); + assert(0 == rv); + + rv = rtb_on_pkt_lost_resched_move(rtb, conn, pktns, ent); + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, + rtb->frc_objalloc, rtb->mem); + if (rv != 0) { + return rv; + } + } + + return 0; +} + +void ngtcp2_rtb_remove_early_data(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat) { + ngtcp2_rtb_entry *ent; + ngtcp2_ksl_it it; + int rv; + (void)rv; + + it = ngtcp2_ksl_begin(&rtb->ents); + + for (; !ngtcp2_ksl_it_end(&it);) { + ent = ngtcp2_ksl_it_get(&it); + + if (ent->hd.type != NGTCP2_PKT_0RTT) { + ngtcp2_ksl_it_next(&it); + continue; + } + + rtb_on_remove(rtb, ent, cstat); + rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num); + assert(0 == rv); + + ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc, + rtb->frc_objalloc, rtb->mem); + } +} + +int ngtcp2_rtb_empty(ngtcp2_rtb *rtb) { + return ngtcp2_ksl_len(&rtb->ents) == 0; +} + +void ngtcp2_rtb_reset_cc_state(ngtcp2_rtb *rtb, int64_t cc_pkt_num) { + rtb->cc_pkt_num = cc_pkt_num; + rtb->cc_bytes_in_flight = 0; +} + +ngtcp2_ssize ngtcp2_rtb_reclaim_on_pto(ngtcp2_rtb *rtb, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, size_t num_pkts) { + ngtcp2_ksl_it it; + ngtcp2_rtb_entry *ent; + ngtcp2_ssize reclaimed; + size_t atmost = num_pkts; + + it = ngtcp2_ksl_end(&rtb->ents); + for (; !ngtcp2_ksl_it_begin(&it) && num_pkts >= 1;) { + ngtcp2_ksl_it_prev(&it); + ent = ngtcp2_ksl_it_get(&it); + + if ((ent->flags & (NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED | + NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED)) || + !(ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE)) { + continue; + } + + assert(ent->frc); + + reclaimed = + rtb_reclaim_frame(rtb, NGTCP2_RECLAIM_FLAG_NONE, conn, pktns, ent); + if (reclaimed < 0) { + return reclaimed; + } + + /* Mark reclaimed even if reclaimed == 0 so that we can skip it in + the next run. */ + ent->flags |= NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED; + + assert(rtb->num_retransmittable); + --rtb->num_retransmittable; + + if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING) { + ent->flags &= (uint16_t)~NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING; + assert(rtb->num_pto_eliciting); + --rtb->num_pto_eliciting; + } + + if (reclaimed) { + --num_pkts; + } + } + + return (ngtcp2_ssize)(atmost - num_pkts); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h new file mode 100644 index 0000000..5183aed --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h @@ -0,0 +1,467 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_RTB_H +#define NGTCP2_RTB_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_pkt.h" +#include "ngtcp2_ksl.h" +#include "ngtcp2_pq.h" +#include "ngtcp2_objalloc.h" + +typedef struct ngtcp2_conn ngtcp2_conn; +typedef struct ngtcp2_pktns ngtcp2_pktns; +typedef struct ngtcp2_log ngtcp2_log; +typedef struct ngtcp2_qlog ngtcp2_qlog; +typedef struct ngtcp2_strm ngtcp2_strm; +typedef struct ngtcp2_rst ngtcp2_rst; +typedef struct ngtcp2_cc ngtcp2_cc; + +/* NGTCP2_FRAME_CHAIN_BINDER_FLAG_NONE indicates that no flag is + set. */ +#define NGTCP2_FRAME_CHAIN_BINDER_FLAG_NONE 0x00u +/* NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK indicates that an information + which a frame carries has been acknowledged. */ +#define NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK 0x01u + +/* + * ngtcp2_frame_chain_binder binds 2 or more of ngtcp2_frame_chain to + * share the acknowledgement state. In general, all + * ngtcp2_frame_chains bound to the same binder must have the same + * information. + */ +typedef struct ngtcp2_frame_chain_binder { + size_t refcount; + /* flags is bitwise OR of zero or more of + NGTCP2_FRAME_CHAIN_BINDER_FLAG_*. */ + uint32_t flags; +} ngtcp2_frame_chain_binder; + +int ngtcp2_frame_chain_binder_new(ngtcp2_frame_chain_binder **pbinder, + const ngtcp2_mem *mem); + +typedef struct ngtcp2_frame_chain ngtcp2_frame_chain; + +/* + * ngtcp2_frame_chain chains frames in a single packet. + */ +struct ngtcp2_frame_chain { + union { + struct { + ngtcp2_frame_chain *next; + ngtcp2_frame_chain_binder *binder; + ngtcp2_frame fr; + }; + + ngtcp2_opl_entry oplent; + }; +}; + +ngtcp2_objalloc_def(frame_chain, ngtcp2_frame_chain, oplent); + +/* + * ngtcp2_bind_frame_chains binds two frame chains |a| and |b| using + * new or existing ngtcp2_frame_chain_binder. |a| might have non-NULL + * a->binder. |b| must not have non-NULL b->binder. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_bind_frame_chains(ngtcp2_frame_chain *a, ngtcp2_frame_chain *b, + const ngtcp2_mem *mem); + +/* NGTCP2_MAX_STREAM_DATACNT is the maximum number of ngtcp2_vec that + a ngtcp2_stream can include. */ +#define NGTCP2_MAX_STREAM_DATACNT 256 + +/* NGTCP2_MAX_CRYPTO_DATACNT is the maximum number of ngtcp2_vec that + a ngtcp2_crypto can include. */ +#define NGTCP2_MAX_CRYPTO_DATACNT 8 + +/* + * ngtcp2_frame_chain_new allocates ngtcp2_frame_chain object and + * assigns its pointer to |*pfrc|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory. + */ +int ngtcp2_frame_chain_new(ngtcp2_frame_chain **pfrc, const ngtcp2_mem *mem); + +/* + * ngtcp2_frame_chain_objalloc_new behaves like + * ngtcp2_frame_chain_new, but it uses |objalloc| to allocate the object. + */ +int ngtcp2_frame_chain_objalloc_new(ngtcp2_frame_chain **pfrc, + ngtcp2_objalloc *objalloc); + +/* + * ngtcp2_frame_chain_extralen_new works like ngtcp2_frame_chain_new, + * but it allocates extra memory |extralen| in order to extend + * ngtcp2_frame. + */ +int ngtcp2_frame_chain_extralen_new(ngtcp2_frame_chain **pfrc, size_t extralen, + const ngtcp2_mem *mem); + +/* + * ngtcp2_frame_chain_stream_datacnt_objalloc_new works like + * ngtcp2_frame_chain_new, but it allocates enough data to store + * additional |datacnt| - 1 ngtcp2_vec object after ngtcp2_stream + * object. If no additional space is required, + * ngtcp2_frame_chain_objalloc_new is called internally. + */ +int ngtcp2_frame_chain_stream_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc, + size_t datacnt, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem); + +/* + * ngtcp2_frame_chain_crypto_datacnt_objalloc_new works like + * ngtcp2_frame_chain_new, but it allocates enough data to store + * additional |datacnt| - 1 ngtcp2_vec object after ngtcp2_crypto + * object. If no additional space is required, + * ngtcp2_frame_chain_objalloc_new is called internally. + */ +int ngtcp2_frame_chain_crypto_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc, + size_t datacnt, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem); + +int ngtcp2_frame_chain_new_token_objalloc_new(ngtcp2_frame_chain **pfrc, + const uint8_t *token, + size_t tokenlen, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem); + +/* + * ngtcp2_frame_chain_del deallocates |frc|. It also deallocates the + * memory pointed by |frc|. + */ +void ngtcp2_frame_chain_del(ngtcp2_frame_chain *frc, const ngtcp2_mem *mem); + +/* + * ngtcp2_frame_chain_objalloc_del adds |frc| to |objalloc| for reuse. + * It might just delete |frc| depending on the frame type and the size + * of |frc|. + */ +void ngtcp2_frame_chain_objalloc_del(ngtcp2_frame_chain *frc, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem); + +/* + * ngtcp2_frame_chain_init initializes |frc|. + */ +void ngtcp2_frame_chain_init(ngtcp2_frame_chain *frc); + +/* + * ngtcp2_frame_chain_list_objalloc_del adds all ngtcp2_frame_chain + * linked from |frc| to |objalloc| for reuse. Depending on the frame type + * and its size, ngtcp2_frame_chain might be deleted instead. + */ +void ngtcp2_frame_chain_list_objalloc_del(ngtcp2_frame_chain *frc, + ngtcp2_objalloc *objalloc, + const ngtcp2_mem *mem); + +/* NGTCP2_RTB_ENTRY_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_RTB_ENTRY_FLAG_NONE 0x00u +/* NGTCP2_RTB_ENTRY_FLAG_PROBE indicates that the entry includes a + probe packet. */ +#define NGTCP2_RTB_ENTRY_FLAG_PROBE 0x01u +/* NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE indicates that the entry + includes a frame which must be retransmitted until it is + acknowledged. In most cases, this flag is used along with + NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING and + NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING. */ +#define NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE 0x02u +/* NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING indicates that the entry + elicits acknowledgement. */ +#define NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING 0x04u +/* NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED indicates that the packet has + been reclaimed on PTO. It is not marked lost yet and still + consumes congestion window. */ +#define NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED 0x08u +/* NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED indicates that the entry + has been marked lost and, optionally, scheduled to retransmit. */ +#define NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED 0x10u +/* NGTCP2_RTB_ENTRY_FLAG_ECN indicates that the entry is included in a + UDP datagram with ECN marking. */ +#define NGTCP2_RTB_ENTRY_FLAG_ECN 0x20u +/* NGTCP2_RTB_ENTRY_FLAG_DATAGRAM indicates that the entry includes + DATAGRAM frame. */ +#define NGTCP2_RTB_ENTRY_FLAG_DATAGRAM 0x40u +/* NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE indicates that the entry includes + a PMTUD probe packet. */ +#define NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE 0x80u +/* NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING indicates that the entry + includes a packet which elicits PTO probe packets. */ +#define NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING 0x100u + +typedef struct ngtcp2_rtb_entry ngtcp2_rtb_entry; + +/* + * ngtcp2_rtb_entry is an object stored in ngtcp2_rtb. It corresponds + * to the one packet which is waiting for its ACK. + */ +struct ngtcp2_rtb_entry { + union { + struct { + ngtcp2_rtb_entry *next; + + struct { + int64_t pkt_num; + uint8_t type; + uint8_t flags; + } hd; + ngtcp2_frame_chain *frc; + /* ts is the time point when a packet included in this entry is sent + to a peer. */ + ngtcp2_tstamp ts; + /* lost_ts is the time when this entry is marked lost. */ + ngtcp2_tstamp lost_ts; + /* pktlen is the length of QUIC packet */ + size_t pktlen; + struct { + uint64_t delivered; + ngtcp2_tstamp delivered_ts; + ngtcp2_tstamp first_sent_ts; + uint64_t tx_in_flight; + uint64_t lost; + int is_app_limited; + } rst; + /* flags is bitwise-OR of zero or more of + NGTCP2_RTB_ENTRY_FLAG_*. */ + uint16_t flags; + }; + + ngtcp2_opl_entry oplent; + }; +}; + +ngtcp2_objalloc_def(rtb_entry, ngtcp2_rtb_entry, oplent); + +/* + * ngtcp2_rtb_entry_new allocates ngtcp2_rtb_entry object, and assigns + * its pointer to |*pent|. + */ +int ngtcp2_rtb_entry_objalloc_new(ngtcp2_rtb_entry **pent, + const ngtcp2_pkt_hd *hd, + ngtcp2_frame_chain *frc, ngtcp2_tstamp ts, + size_t pktlen, uint16_t flags, + ngtcp2_objalloc *objalloc); + +/* + * ngtcp2_rtb_entry_objalloc_del adds |ent| to |objalloc| for reuse. + * ngtcp2_frame_chain linked from ent->frc are also added to + * |frc_objalloc| depending on their frame type and size. + */ +void ngtcp2_rtb_entry_objalloc_del(ngtcp2_rtb_entry *ent, + ngtcp2_objalloc *objalloc, + ngtcp2_objalloc *frc_objalloc, + const ngtcp2_mem *mem); + +/* + * ngtcp2_rtb tracks sent packets, and its ACK timeout for + * retransmission. + */ +typedef struct ngtcp2_rtb { + ngtcp2_objalloc *frc_objalloc; + ngtcp2_objalloc *rtb_entry_objalloc; + /* ents includes ngtcp2_rtb_entry sorted by decreasing order of + packet number. */ + ngtcp2_ksl ents; + /* crypto is CRYPTO stream. */ + ngtcp2_strm *crypto; + ngtcp2_rst *rst; + ngtcp2_cc *cc; + ngtcp2_log *log; + ngtcp2_qlog *qlog; + const ngtcp2_mem *mem; + /* largest_acked_tx_pkt_num is the largest packet number + acknowledged by the peer. */ + int64_t largest_acked_tx_pkt_num; + /* num_ack_eliciting is the number of ACK eliciting entries. */ + size_t num_ack_eliciting; + /* num_retransmittable is the number of packets which contain frames + that must be retransmitted on loss. */ + size_t num_retransmittable; + /* num_pto_eliciting is the number of packets that elicit PTO probe + packets. */ + size_t num_pto_eliciting; + /* probe_pkt_left is the number of probe packet to send */ + size_t probe_pkt_left; + /* pktns_id is the identifier of packet number space. */ + ngtcp2_pktns_id pktns_id; + /* cc_pkt_num is the smallest packet number that is contributed to + ngtcp2_conn_stat.bytes_in_flight. */ + int64_t cc_pkt_num; + /* cc_bytes_in_flight is the number of in-flight bytes that is + contributed to ngtcp2_conn_stat.bytes_in_flight. It only + includes the bytes after congestion state is reset. */ + uint64_t cc_bytes_in_flight; + /* persistent_congestion_start_ts is the time when persistent + congestion evaluation is started. It happens roughly after + handshake is confirmed. */ + ngtcp2_tstamp persistent_congestion_start_ts; + /* num_lost_pkts is the number entries in ents which has + NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED flag set. */ + size_t num_lost_pkts; + /* num_lost_pmtud_pkts is the number of entries in ents which have + both NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED and + NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE flags set. */ + size_t num_lost_pmtud_pkts; +} ngtcp2_rtb; + +/* + * ngtcp2_rtb_init initializes |rtb|. + */ +void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_pktns_id pktns_id, + ngtcp2_strm *crypto, ngtcp2_rst *rst, ngtcp2_cc *cc, + ngtcp2_log *log, ngtcp2_qlog *qlog, + ngtcp2_objalloc *rtb_entry_objalloc, + ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem); + +/* + * ngtcp2_rtb_free deallocates resources allocated for |rtb|. + */ +void ngtcp2_rtb_free(ngtcp2_rtb *rtb); + +/* + * ngtcp2_rtb_add adds |ent| to |rtb|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_rtb_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent, + ngtcp2_conn_stat *cstat); + +/* + * ngtcp2_rtb_head returns the iterator which points to the entry + * which has the largest packet number. If there is no entry, + * returned value satisfies ngtcp2_ksl_it_end(&it) != 0. + */ +ngtcp2_ksl_it ngtcp2_rtb_head(ngtcp2_rtb *rtb); + +/* + * ngtcp2_rtb_recv_ack removes acked ngtcp2_rtb_entry from |rtb|. + * |pkt_num| is a packet number which includes |fr|. |pkt_ts| is the + * timestamp when packet is received. |ts| should be the current + * time. Usually they are the same, but for buffered packets, + * |pkt_ts| would be earlier than |ts|. + * + * This function returns the number of newly acknowledged packets if + * it succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_CALLBACK_FAILURE + * User callback failed + * NGTCP2_ERR_NOMEM + * Out of memory + */ +ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr, + ngtcp2_conn_stat *cstat, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, ngtcp2_tstamp pkt_ts, + ngtcp2_tstamp ts); + +/* + * ngtcp2_rtb_detect_lost_pkt detects lost packets and prepends the + * frames contained them to |*pfrc|. Even when this function fails, + * some frames might be prepended to |*pfrc| and the caller should + * handle them. + */ +int ngtcp2_rtb_detect_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat, + ngtcp2_tstamp ts); + +/* + * ngtcp2_rtb_remove_expired_lost_pkt removes expired lost packet. + */ +void ngtcp2_rtb_remove_expired_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_duration pto, + ngtcp2_tstamp ts); + +/* + * ngtcp2_rtb_lost_pkt_ts returns the earliest time when the still + * retained packet was lost. It returns UINT64_MAX if no such packet + * exists. + */ +ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(ngtcp2_rtb *rtb); + +/* + * ngtcp2_rtb_remove_all removes all packets from |rtb| and prepends + * all frames to |*pfrc|. Even when this function fails, some frames + * might be prepended to |*pfrc| and the caller should handle them. + */ +int ngtcp2_rtb_remove_all(ngtcp2_rtb *rtb, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat); + +/* + * ngtcp2_rtb_remove_early_data removes all entries for 0RTT packets. + */ +void ngtcp2_rtb_remove_early_data(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat); + +/* + * ngtcp2_rtb_empty returns nonzero if |rtb| have no entry. + */ +int ngtcp2_rtb_empty(ngtcp2_rtb *rtb); + +/* + * ngtcp2_rtb_reset_cc_state resets congestion state in |rtb|. + * |cc_pkt_num| is the next outbound packet number which is sent under + * new congestion state. + */ +void ngtcp2_rtb_reset_cc_state(ngtcp2_rtb *rtb, int64_t cc_pkt_num); + +/* + * ngtcp2_rtb_remove_expired_lost_pkt ensures that the number of lost + * packets at most |n|. + */ +void ngtcp2_rtb_remove_excessive_lost_pkt(ngtcp2_rtb *rtb, size_t n); + +/* + * ngtcp2_rtb_reclaim_on_pto reclaims up to |num_pkts| packets which + * are in-flight and not marked lost to send them in PTO probe. The + * reclaimed frames are chained to |*pfrc|. + * + * This function returns the number of packets reclaimed if it + * succeeds, or one of the following negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +ngtcp2_ssize ngtcp2_rtb_reclaim_on_pto(ngtcp2_rtb *rtb, ngtcp2_conn *conn, + ngtcp2_pktns *pktns, size_t num_pkts); + +#endif /* NGTCP2_RTB_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c new file mode 100644 index 0000000..a61636d --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c @@ -0,0 +1,233 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_str.h" + +#include <string.h> + +#include "ngtcp2_macro.h" + +void *ngtcp2_cpymem(void *dest, const void *src, size_t n) { + memcpy(dest, src, n); + return (uint8_t *)dest + n; +} + +uint8_t *ngtcp2_setmem(uint8_t *dest, uint8_t b, size_t n) { + memset(dest, b, n); + return dest + n; +} + +const void *ngtcp2_get_bytes(void *dest, const void *src, size_t n) { + memcpy(dest, src, n); + return (uint8_t *)src + n; +} + +#define LOWER_XDIGITS "0123456789abcdef" + +uint8_t *ngtcp2_encode_hex(uint8_t *dest, const uint8_t *data, size_t len) { + size_t i; + uint8_t *p = dest; + + for (i = 0; i < len; ++i) { + *p++ = (uint8_t)LOWER_XDIGITS[data[i] >> 4]; + *p++ = (uint8_t)LOWER_XDIGITS[data[i] & 0xf]; + } + + *p = '\0'; + + return dest; +} + +char *ngtcp2_encode_printable_ascii(char *dest, const uint8_t *data, + size_t len) { + size_t i; + char *p = dest; + uint8_t c; + + for (i = 0; i < len; ++i) { + c = data[i]; + if (0x20 <= c && c <= 0x7e) { + *p++ = (char)c; + } else { + *p++ = '.'; + } + } + + *p = '\0'; + + return dest; +} + +/* + * write_uint writes |n| to the buffer pointed by |p| in decimal + * representation. It returns |p| plus the number of bytes written. + * The function assumes that the buffer has enough capacity to contain + * a string. + */ +static uint8_t *write_uint(uint8_t *p, uint64_t n) { + size_t nlen = 0; + uint64_t t; + uint8_t *res; + + if (n == 0) { + *p++ = '0'; + return p; + } + for (t = n; t; t /= 10, ++nlen) + ; + p += nlen; + res = p; + for (; n; n /= 10) { + *--p = (uint8_t)((n % 10) + '0'); + } + return res; +} + +uint8_t *ngtcp2_encode_ipv4(uint8_t *dest, const uint8_t *addr) { + size_t i; + uint8_t *p = dest; + + p = write_uint(p, addr[0]); + + for (i = 1; i < 4; ++i) { + *p++ = '.'; + p = write_uint(p, addr[i]); + } + + *p = '\0'; + + return dest; +} + +/* + * write_hex_zsup writes the content of buffer pointed by |data| of + * length |len| to |dest| in hex string. Any leading zeros are + * suppressed. It returns |dest| plus the number of bytes written. + */ +static uint8_t *write_hex_zsup(uint8_t *dest, const uint8_t *data, size_t len) { + size_t i; + uint8_t *p = dest; + uint8_t d; + + for (i = 0; i < len; ++i) { + d = data[i]; + if (d >> 4) { + break; + } + + d &= 0xf; + + if (d) { + *p++ = (uint8_t)LOWER_XDIGITS[d]; + ++i; + break; + } + } + + if (p == dest && i == len) { + *p++ = '0'; + return p; + } + + for (; i < len; ++i) { + d = data[i]; + *p++ = (uint8_t)LOWER_XDIGITS[d >> 4]; + *p++ = (uint8_t)LOWER_XDIGITS[d & 0xf]; + } + + return p; +} + +uint8_t *ngtcp2_encode_ipv6(uint8_t *dest, const uint8_t *addr) { + uint16_t blks[8]; + size_t i; + size_t zlen, zoff; + size_t max_zlen = 0, max_zoff = 8; + uint8_t *p = dest; + + for (i = 0; i < 16; i += sizeof(uint16_t)) { + /* Copy in network byte order. */ + memcpy(&blks[i / sizeof(uint16_t)], addr + i, sizeof(uint16_t)); + } + + for (i = 0; i < 8;) { + if (blks[i]) { + ++i; + continue; + } + + zlen = 1; + zoff = i; + + ++i; + for (; i < 8 && blks[i] == 0; ++i, ++zlen) + ; + if (zlen > max_zlen) { + max_zlen = zlen; + max_zoff = zoff; + } + } + + /* Do not suppress a single '0' block */ + if (max_zlen == 1) { + max_zoff = 8; + } + + if (max_zoff != 0) { + p = write_hex_zsup(p, (const uint8_t *)blks, sizeof(uint16_t)); + + for (i = 1; i < max_zoff; ++i) { + *p++ = ':'; + p = write_hex_zsup(p, (const uint8_t *)(blks + i), sizeof(uint16_t)); + } + } + + if (max_zoff != 8) { + *p++ = ':'; + + if (max_zoff + max_zlen == 8) { + *p++ = ':'; + } else { + for (i = max_zoff + max_zlen; i < 8; ++i) { + *p++ = ':'; + p = write_hex_zsup(p, (const uint8_t *)(blks + i), sizeof(uint16_t)); + } + } + } + + *p = '\0'; + + return dest; +} + +int ngtcp2_cmemeq(const uint8_t *a, const uint8_t *b, size_t n) { + size_t i; + int rv = 0; + + for (i = 0; i < n; ++i) { + rv |= a[i] ^ b[i]; + } + + return rv == 0; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h new file mode 100644 index 0000000..deb75e3 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h @@ -0,0 +1,94 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_STR_H +#define NGTCP2_STR_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +void *ngtcp2_cpymem(void *dest, const void *src, size_t n); + +/* + * ngtcp2_setmem writes a string of length |n| consisting only |b| to + * the buffer pointed by |dest|. It returns dest + n; + */ +uint8_t *ngtcp2_setmem(uint8_t *dest, uint8_t b, size_t n); + +/* + * ngtcp2_get_bytes copies |n| bytes from |src| to |dest|, and returns + * |src| + |n|. + */ +const void *ngtcp2_get_bytes(void *dest, const void *src, size_t n); + +/* + * ngtcp2_encode_hex encodes |data| of length |len| in hex string. It + * writes additional NULL bytes at the end of the buffer. The buffer + * pointed by |dest| must have at least |len| * 2 + 1 bytes space. + * This function returns |dest|. + */ +uint8_t *ngtcp2_encode_hex(uint8_t *dest, const uint8_t *data, size_t len); + +/* + * ngtcp2_encode_ipv4 encodes binary form IPv4 address stored in + * |addr| to human readable text form in the buffer pointed by |dest|. + * The capacity of buffer must have enough length to store a text form + * plus a terminating NULL byte. The resulting text form ends with + * NULL byte. The function returns |dest|. + */ +uint8_t *ngtcp2_encode_ipv4(uint8_t *dest, const uint8_t *addr); + +/* + * ngtcp2_encode_ipv6 encodes binary form IPv6 address stored in + * |addr| to human readable text form in the buffer pointed by |dest|. + * The capacity of buffer must have enough length to store a text form + * plus a terminating NULL byte. The resulting text form ends with + * NULL byte. The function produces the canonical form of IPv6 text + * representation described in + * https://tools.ietf.org/html/rfc5952#section-4. The function + * returns |dest|. + */ +uint8_t *ngtcp2_encode_ipv6(uint8_t *dest, const uint8_t *addr); + +/* + * ngtcp2_encode_printable_ascii encodes |data| of length |len| in + * |dest| in the following manner: printable ascii characters are + * copied as is. The other characters are converted to ".". It + * writes additional NULL bytes at the end of the buffer. |dest| must + * have at least |len| + 1 bytes. This function returns |dest|. + */ +char *ngtcp2_encode_printable_ascii(char *dest, const uint8_t *data, + size_t len); + +/* + * ngtcp2_cmemeq returns nonzero if the first |n| bytes of the buffers + * pointed by |a| and |b| are equal. The comparison is done in a + * constant time manner. + */ +int ngtcp2_cmemeq(const uint8_t *a, const uint8_t *b, size_t n); + +#endif /* NGTCP2_STR_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c new file mode 100644 index 0000000..6f20e86 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c @@ -0,0 +1,698 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_strm.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_rtb.h" +#include "ngtcp2_pkt.h" +#include "ngtcp2_vec.h" + +static int offset_less(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) { + return *(int64_t *)lhs < *(int64_t *)rhs; +} + +void ngtcp2_strm_init(ngtcp2_strm *strm, int64_t stream_id, uint32_t flags, + uint64_t max_rx_offset, uint64_t max_tx_offset, + void *stream_user_data, ngtcp2_objalloc *frc_objalloc, + const ngtcp2_mem *mem) { + strm->frc_objalloc = frc_objalloc; + strm->cycle = 0; + strm->tx.acked_offset = NULL; + strm->tx.cont_acked_offset = 0; + strm->tx.streamfrq = NULL; + strm->tx.offset = 0; + strm->tx.max_offset = max_tx_offset; + strm->tx.last_max_stream_data_ts = UINT64_MAX; + strm->tx.loss_count = 0; + strm->tx.last_lost_pkt_num = -1; + strm->rx.rob = NULL; + strm->rx.cont_offset = 0; + strm->rx.last_offset = 0; + strm->stream_id = stream_id; + strm->flags = flags; + strm->stream_user_data = stream_user_data; + strm->rx.window = strm->rx.max_offset = strm->rx.unsent_max_offset = + max_rx_offset; + strm->pe.index = NGTCP2_PQ_BAD_INDEX; + strm->mem = mem; + strm->app_error_code = 0; +} + +void ngtcp2_strm_free(ngtcp2_strm *strm) { + ngtcp2_ksl_it it; + + if (strm == NULL) { + return; + } + + if (strm->tx.streamfrq) { + for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + ngtcp2_frame_chain_objalloc_del(ngtcp2_ksl_it_get(&it), + strm->frc_objalloc, strm->mem); + } + + ngtcp2_ksl_free(strm->tx.streamfrq); + ngtcp2_mem_free(strm->mem, strm->tx.streamfrq); + } + + if (strm->rx.rob) { + ngtcp2_rob_free(strm->rx.rob); + ngtcp2_mem_free(strm->mem, strm->rx.rob); + } + + if (strm->tx.acked_offset) { + ngtcp2_gaptr_free(strm->tx.acked_offset); + ngtcp2_mem_free(strm->mem, strm->tx.acked_offset); + } +} + +static int strm_rob_init(ngtcp2_strm *strm) { + int rv; + ngtcp2_rob *rob = ngtcp2_mem_malloc(strm->mem, sizeof(*rob)); + + if (rob == NULL) { + return NGTCP2_ERR_NOMEM; + } + + rv = ngtcp2_rob_init(rob, 8 * 1024, strm->mem); + if (rv != 0) { + ngtcp2_mem_free(strm->mem, rob); + return rv; + } + + strm->rx.rob = rob; + + return 0; +} + +uint64_t ngtcp2_strm_rx_offset(ngtcp2_strm *strm) { + if (strm->rx.rob == NULL) { + return strm->rx.cont_offset; + } + return ngtcp2_rob_first_gap_offset(strm->rx.rob); +} + +/* strm_rob_heavily_fragmented returns nonzero if the number of gaps + in |rob| exceeds the limit. */ +static int strm_rob_heavily_fragmented(ngtcp2_rob *rob) { + return ngtcp2_ksl_len(&rob->gapksl) >= 1000; +} + +int ngtcp2_strm_recv_reordering(ngtcp2_strm *strm, const uint8_t *data, + size_t datalen, uint64_t offset) { + int rv; + + if (strm->rx.rob == NULL) { + rv = strm_rob_init(strm); + if (rv != 0) { + return rv; + } + + if (strm->rx.cont_offset) { + rv = ngtcp2_rob_remove_prefix(strm->rx.rob, strm->rx.cont_offset); + if (rv != 0) { + return rv; + } + } + } + + if (strm_rob_heavily_fragmented(strm->rx.rob)) { + return NGTCP2_ERR_INTERNAL; + } + + return ngtcp2_rob_push(strm->rx.rob, offset, data, datalen); +} + +int ngtcp2_strm_update_rx_offset(ngtcp2_strm *strm, uint64_t offset) { + if (strm->rx.rob == NULL) { + strm->rx.cont_offset = offset; + return 0; + } + + return ngtcp2_rob_remove_prefix(strm->rx.rob, offset); +} + +void ngtcp2_strm_shutdown(ngtcp2_strm *strm, uint32_t flags) { + strm->flags |= flags & NGTCP2_STRM_FLAG_SHUT_RDWR; +} + +static int strm_streamfrq_init(ngtcp2_strm *strm) { + ngtcp2_ksl *streamfrq = ngtcp2_mem_malloc(strm->mem, sizeof(*streamfrq)); + if (streamfrq == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_ksl_init(streamfrq, offset_less, sizeof(uint64_t), strm->mem); + + strm->tx.streamfrq = streamfrq; + + return 0; +} + +int ngtcp2_strm_streamfrq_push(ngtcp2_strm *strm, ngtcp2_frame_chain *frc) { + int rv; + + assert(frc->fr.type == NGTCP2_FRAME_STREAM); + assert(frc->next == NULL); + + if (strm->tx.streamfrq == NULL) { + rv = strm_streamfrq_init(strm); + if (rv != 0) { + return rv; + } + } + + return ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &frc->fr.stream.offset, + frc); +} + +static int strm_streamfrq_unacked_pop(ngtcp2_strm *strm, + ngtcp2_frame_chain **pfrc) { + ngtcp2_frame_chain *frc, *nfrc; + ngtcp2_stream *fr, *nfr; + uint64_t offset, end_offset; + size_t idx, end_idx; + uint64_t base_offset, end_base_offset; + ngtcp2_range gap; + ngtcp2_vec *v; + int rv; + ngtcp2_ksl_it it; + + *pfrc = NULL; + + assert(strm->tx.streamfrq); + assert(ngtcp2_ksl_len(strm->tx.streamfrq)); + + for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it);) { + frc = ngtcp2_ksl_it_get(&it); + fr = &frc->fr.stream; + + ngtcp2_ksl_remove_hint(strm->tx.streamfrq, &it, &it, &fr->offset); + + idx = 0; + offset = fr->offset; + base_offset = 0; + + gap = ngtcp2_strm_get_unacked_range_after(strm, offset); + if (gap.begin < offset) { + gap.begin = offset; + } + + for (; idx < fr->datacnt && offset < gap.begin; ++idx) { + v = &fr->data[idx]; + if (offset + v->len > gap.begin) { + base_offset = gap.begin - offset; + break; + } + + offset += v->len; + } + + if (idx == fr->datacnt) { + if (fr->fin) { + if (strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED) { + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + assert(ngtcp2_ksl_len(strm->tx.streamfrq) == 0); + return 0; + } + + fr->offset += ngtcp2_vec_len(fr->data, fr->datacnt); + fr->datacnt = 0; + + *pfrc = frc; + + return 0; + } + + if (fr->offset == 0 && fr->datacnt == 0 && strm->tx.offset == 0 && + !(strm->flags & NGTCP2_STRM_FLAG_ANY_ACKED)) { + *pfrc = frc; + + return 0; + } + + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + continue; + } + + assert(gap.begin == offset + base_offset); + + end_idx = idx; + end_offset = offset; + end_base_offset = 0; + + for (; end_idx < fr->datacnt; ++end_idx) { + v = &fr->data[end_idx]; + if (end_offset + v->len > gap.end) { + end_base_offset = gap.end - end_offset; + break; + } + + end_offset += v->len; + } + + if (fr->offset == offset && base_offset == 0 && fr->datacnt == end_idx) { + *pfrc = frc; + return 0; + } + + if (fr->datacnt == end_idx) { + memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx)); + + assert(fr->data[0].len > base_offset); + + fr->offset = offset + base_offset; + fr->datacnt = end_idx - idx; + fr->data[0].base += base_offset; + fr->data[0].len -= (size_t)base_offset; + + *pfrc = frc; + return 0; + } + + rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( + &nfrc, fr->datacnt - end_idx, strm->frc_objalloc, strm->mem); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + nfr = &nfrc->fr.stream; + memcpy(nfr->data, fr->data + end_idx, + sizeof(nfr->data[0]) * (fr->datacnt - end_idx)); + + assert(nfr->data[0].len > end_base_offset); + + nfr->type = NGTCP2_FRAME_STREAM; + nfr->flags = 0; + nfr->fin = fr->fin; + nfr->stream_id = fr->stream_id; + nfr->offset = end_offset + end_base_offset; + nfr->datacnt = fr->datacnt - end_idx; + nfr->data[0].base += end_base_offset; + nfr->data[0].len -= (size_t)end_base_offset; + + rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + if (end_base_offset) { + ++end_idx; + } + + memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx)); + + assert(fr->data[0].len > base_offset); + + fr->fin = 0; + fr->offset = offset + base_offset; + fr->datacnt = end_idx - idx; + if (end_base_offset) { + assert(fr->data[fr->datacnt - 1].len > end_base_offset); + fr->data[fr->datacnt - 1].len = (size_t)end_base_offset; + } + fr->data[0].base += base_offset; + fr->data[0].len -= (size_t)base_offset; + + *pfrc = frc; + return 0; + } + + return 0; +} + +int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, + size_t left) { + ngtcp2_stream *fr, *nfr; + ngtcp2_frame_chain *frc, *nfrc; + int rv; + size_t nmerged; + uint64_t datalen; + ngtcp2_vec a[NGTCP2_MAX_STREAM_DATACNT]; + ngtcp2_vec b[NGTCP2_MAX_STREAM_DATACNT]; + size_t acnt, bcnt; + uint64_t unacked_offset; + + if (strm->tx.streamfrq == NULL || ngtcp2_ksl_len(strm->tx.streamfrq) == 0) { + *pfrc = NULL; + return 0; + } + + rv = strm_streamfrq_unacked_pop(strm, &frc); + if (rv != 0) { + return rv; + } + if (frc == NULL) { + *pfrc = NULL; + return 0; + } + + fr = &frc->fr.stream; + datalen = ngtcp2_vec_len(fr->data, fr->datacnt); + + if (left == 0) { + /* datalen could be zero if 0 length STREAM has been sent */ + if (datalen || ngtcp2_ksl_len(strm->tx.streamfrq) > 1) { + rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &fr->offset, frc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + *pfrc = NULL; + return 0; + } + } + + if (datalen > left) { + ngtcp2_vec_copy(a, fr->data, fr->datacnt); + acnt = fr->datacnt; + + bcnt = 0; + ngtcp2_vec_split(a, &acnt, b, &bcnt, left, NGTCP2_MAX_STREAM_DATACNT); + + assert(acnt > 0); + assert(bcnt > 0); + + rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( + &nfrc, bcnt, strm->frc_objalloc, strm->mem); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + nfr = &nfrc->fr.stream; + nfr->type = NGTCP2_FRAME_STREAM; + nfr->flags = 0; + nfr->fin = fr->fin; + nfr->stream_id = fr->stream_id; + nfr->offset = fr->offset + left; + nfr->datacnt = bcnt; + ngtcp2_vec_copy(nfr->data, b, bcnt); + + rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( + &nfrc, acnt, strm->frc_objalloc, strm->mem); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + nfr = &nfrc->fr.stream; + *nfr = *fr; + nfr->fin = 0; + nfr->datacnt = acnt; + ngtcp2_vec_copy(nfr->data, a, acnt); + + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + + *pfrc = nfrc; + + return 0; + } + + left -= (size_t)datalen; + + ngtcp2_vec_copy(a, fr->data, fr->datacnt); + acnt = fr->datacnt; + + for (; left && ngtcp2_ksl_len(strm->tx.streamfrq);) { + unacked_offset = ngtcp2_strm_streamfrq_unacked_offset(strm); + if (unacked_offset != fr->offset + datalen) { + assert(fr->offset + datalen < unacked_offset); + break; + } + + rv = strm_streamfrq_unacked_pop(strm, &nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + if (nfrc == NULL) { + break; + } + + nfr = &nfrc->fr.stream; + + if (nfr->fin && nfr->datacnt == 0) { + fr->fin = 1; + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + break; + } + + nmerged = ngtcp2_vec_merge(a, &acnt, nfr->data, &nfr->datacnt, left, + NGTCP2_MAX_STREAM_DATACNT); + if (nmerged == 0) { + rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + assert(ngtcp2_err_is_fatal(rv)); + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + break; + } + + datalen += nmerged; + left -= nmerged; + + if (nfr->datacnt == 0) { + fr->fin = nfr->fin; + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + continue; + } + + nfr->offset += nmerged; + + rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + break; + } + + if (acnt == fr->datacnt) { + if (acnt > 0) { + fr->data[acnt - 1] = a[acnt - 1]; + } + + *pfrc = frc; + return 0; + } + + assert(acnt > fr->datacnt); + + rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new( + &nfrc, acnt, strm->frc_objalloc, strm->mem); + if (rv != 0) { + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + return rv; + } + + nfr = &nfrc->fr.stream; + *nfr = *fr; + nfr->datacnt = acnt; + ngtcp2_vec_copy(nfr->data, a, acnt); + + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + + *pfrc = nfrc; + + return 0; +} + +uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm) { + ngtcp2_frame_chain *frc; + ngtcp2_stream *fr; + ngtcp2_range gap; + ngtcp2_ksl_it it; + uint64_t datalen; + + assert(strm->tx.streamfrq); + assert(ngtcp2_ksl_len(strm->tx.streamfrq)); + + for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + frc = ngtcp2_ksl_it_get(&it); + fr = &frc->fr.stream; + + gap = ngtcp2_strm_get_unacked_range_after(strm, fr->offset); + + datalen = ngtcp2_vec_len(fr->data, fr->datacnt); + + if (gap.begin <= fr->offset) { + return fr->offset; + } + if (gap.begin < fr->offset + datalen) { + return gap.begin; + } + if (fr->offset + datalen == gap.begin && fr->fin && + !(strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED)) { + return fr->offset + datalen; + } + } + + return (uint64_t)-1; +} + +ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(ngtcp2_strm *strm) { + ngtcp2_ksl_it it; + + assert(strm->tx.streamfrq); + assert(ngtcp2_ksl_len(strm->tx.streamfrq)); + + it = ngtcp2_ksl_begin(strm->tx.streamfrq); + return ngtcp2_ksl_it_get(&it); +} + +int ngtcp2_strm_streamfrq_empty(ngtcp2_strm *strm) { + return strm->tx.streamfrq == NULL || ngtcp2_ksl_len(strm->tx.streamfrq) == 0; +} + +void ngtcp2_strm_streamfrq_clear(ngtcp2_strm *strm) { + ngtcp2_frame_chain *frc; + ngtcp2_ksl_it it; + + if (strm->tx.streamfrq == NULL) { + return; + } + + for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it); + ngtcp2_ksl_it_next(&it)) { + frc = ngtcp2_ksl_it_get(&it); + ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem); + } + ngtcp2_ksl_clear(strm->tx.streamfrq); +} + +int ngtcp2_strm_is_tx_queued(ngtcp2_strm *strm) { + return strm->pe.index != NGTCP2_PQ_BAD_INDEX; +} + +int ngtcp2_strm_is_all_tx_data_acked(ngtcp2_strm *strm) { + if (strm->tx.acked_offset == NULL) { + return strm->tx.cont_acked_offset == strm->tx.offset; + } + + return ngtcp2_gaptr_first_gap_offset(strm->tx.acked_offset) == + strm->tx.offset; +} + +int ngtcp2_strm_is_all_tx_data_fin_acked(ngtcp2_strm *strm) { + return (strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED) && + ngtcp2_strm_is_all_tx_data_acked(strm); +} + +ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm, + uint64_t offset) { + ngtcp2_range gap; + + if (strm->tx.acked_offset == NULL) { + gap.begin = strm->tx.cont_acked_offset; + gap.end = UINT64_MAX; + return gap; + } + + return ngtcp2_gaptr_get_first_gap_after(strm->tx.acked_offset, offset); +} + +uint64_t ngtcp2_strm_get_acked_offset(ngtcp2_strm *strm) { + if (strm->tx.acked_offset == NULL) { + return strm->tx.cont_acked_offset; + } + + return ngtcp2_gaptr_first_gap_offset(strm->tx.acked_offset); +} + +static int strm_acked_offset_init(ngtcp2_strm *strm) { + ngtcp2_gaptr *acked_offset = + ngtcp2_mem_malloc(strm->mem, sizeof(*acked_offset)); + + if (acked_offset == NULL) { + return NGTCP2_ERR_NOMEM; + } + + ngtcp2_gaptr_init(acked_offset, strm->mem); + + strm->tx.acked_offset = acked_offset; + + return 0; +} + +int ngtcp2_strm_ack_data(ngtcp2_strm *strm, uint64_t offset, uint64_t len) { + int rv; + + if (strm->tx.acked_offset == NULL) { + if (strm->tx.cont_acked_offset == offset) { + strm->tx.cont_acked_offset += len; + return 0; + } + + rv = strm_acked_offset_init(strm); + if (rv != 0) { + return rv; + } + + rv = + ngtcp2_gaptr_push(strm->tx.acked_offset, 0, strm->tx.cont_acked_offset); + if (rv != 0) { + return rv; + } + } + + return ngtcp2_gaptr_push(strm->tx.acked_offset, offset, len); +} + +void ngtcp2_strm_set_app_error_code(ngtcp2_strm *strm, + uint64_t app_error_code) { + if (strm->flags & NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET) { + return; + } + + assert(0 == strm->app_error_code); + + strm->flags |= NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET; + strm->app_error_code = app_error_code; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h new file mode 100644 index 0000000..8e3cfe8 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h @@ -0,0 +1,310 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_STRM_H +#define NGTCP2_STRM_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_rob.h" +#include "ngtcp2_map.h" +#include "ngtcp2_gaptr.h" +#include "ngtcp2_ksl.h" +#include "ngtcp2_pq.h" + +typedef struct ngtcp2_frame_chain ngtcp2_frame_chain; + +/* NGTCP2_STRM_FLAG_NONE indicates that no flag is set. */ +#define NGTCP2_STRM_FLAG_NONE 0x00u +/* NGTCP2_STRM_FLAG_SHUT_RD indicates that further reception of stream + data is not allowed. */ +#define NGTCP2_STRM_FLAG_SHUT_RD 0x01u +/* NGTCP2_STRM_FLAG_SHUT_WR indicates that further transmission of + stream data is not allowed. */ +#define NGTCP2_STRM_FLAG_SHUT_WR 0x02u +#define NGTCP2_STRM_FLAG_SHUT_RDWR \ + (NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_SHUT_WR) +/* NGTCP2_STRM_FLAG_SENT_RST indicates that RST_STREAM is sent from + the local endpoint. In this case, NGTCP2_STRM_FLAG_SHUT_WR is also + set. */ +#define NGTCP2_STRM_FLAG_SENT_RST 0x04u +/* NGTCP2_STRM_FLAG_SENT_RST indicates that RST_STREAM is received + from the remote endpoint. In this case, NGTCP2_STRM_FLAG_SHUT_RD + is also set. */ +#define NGTCP2_STRM_FLAG_RECV_RST 0x08u +/* NGTCP2_STRM_FLAG_STOP_SENDING indicates that STOP_SENDING is sent + from the local endpoint. */ +#define NGTCP2_STRM_FLAG_STOP_SENDING 0x10u +/* NGTCP2_STRM_FLAG_RST_ACKED indicates that the outgoing RST_STREAM + is acknowledged by peer. */ +#define NGTCP2_STRM_FLAG_RST_ACKED 0x20u +/* NGTCP2_STRM_FLAG_FIN_ACKED indicates that a STREAM with FIN bit set + is acknowledged by a remote endpoint. */ +#define NGTCP2_STRM_FLAG_FIN_ACKED 0x40u +/* NGTCP2_STRM_FLAG_ANY_ACKED indicates that any portion of stream + data, including 0 length segment, is acknowledged. */ +#define NGTCP2_STRM_FLAG_ANY_ACKED 0x80u +/* NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET indicates that app_error_code + field is set. This resolves the ambiguity that the initial + app_error_code value 0 might be a proper application error code. + In this case, without this flag, we are unable to distinguish + assigned value from unassigned one. */ +#define NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET 0x100u +/* NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED is set when + stream_stop_sending callback is called. */ +#define NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED 0x200u + +typedef struct ngtcp2_strm ngtcp2_strm; + +struct ngtcp2_strm { + union { + struct { + ngtcp2_pq_entry pe; + uint64_t cycle; + ngtcp2_objalloc *frc_objalloc; + + struct { + /* acked_offset tracks acknowledged outgoing data. */ + ngtcp2_gaptr *acked_offset; + /* cont_acked_offset is the offset that all data up to this offset + is acknowledged by a remote endpoint. It is used until the + remote endpoint acknowledges data in out-of-order. After that, + acked_offset is used instead. */ + uint64_t cont_acked_offset; + /* streamfrq contains STREAM frame for retransmission. The flow + control credits have been paid when they are transmitted first + time. There are no restriction regarding flow control for + retransmission. */ + ngtcp2_ksl *streamfrq; + /* offset is the next offset of outgoing data. In other words, it + is the number of bytes sent in this stream without + duplication. */ + uint64_t offset; + /* max_tx_offset is the maximum offset that local endpoint can + send for this stream. */ + uint64_t max_offset; + /* last_max_stream_data_ts is the timestamp when last + MAX_STREAM_DATA frame is sent. */ + ngtcp2_tstamp last_max_stream_data_ts; + /* loss_count is the number of packets that contain STREAM + frame for this stream and are declared to be lost. It may + include the spurious losses. It does not include a packet + whose contents have been reclaimed for PTO and which is + later declared to be lost. Those data are not blocked by + the flow control and will be sent immediately if no other + restrictions are applied. */ + size_t loss_count; + /* last_lost_pkt_num is the packet number of the packet that + is counted to loss_count. It is used to avoid to count + multiple STREAM frames in one lost packet. */ + int64_t last_lost_pkt_num; + } tx; + + struct { + /* rob is the reorder buffer for incoming stream data. The data + received in out of order is buffered and sorted by its offset + in this object. */ + ngtcp2_rob *rob; + /* cont_offset is the largest offset of consecutive data. It is + used until the endpoint receives out-of-order data. After + that, rob is used to track the offset and data. */ + uint64_t cont_offset; + /* last_offset is the largest offset of stream data received for + this stream. */ + uint64_t last_offset; + /* max_offset is the maximum offset that remote endpoint can send + to this stream. */ + uint64_t max_offset; + /* unsent_max_offset is the maximum offset that remote endpoint + can send to this stream, and it is not notified to the remote + endpoint. unsent_max_offset >= max_offset must be hold. */ + uint64_t unsent_max_offset; + /* window is the stream-level flow control window size. */ + uint64_t window; + } rx; + + const ngtcp2_mem *mem; + int64_t stream_id; + void *stream_user_data; + /* flags is bit-wise OR of zero or more of NGTCP2_STRM_FLAG_*. */ + uint32_t flags; + /* app_error_code is an error code the local endpoint sent in + RESET_STREAM or STOP_SENDING, or received from a remote endpoint + in RESET_STREAM or STOP_SENDING. First application error code is + chosen and when set, NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET flag is + set in flags field. */ + uint64_t app_error_code; + }; + + ngtcp2_opl_entry oplent; + }; +}; + +/* + * ngtcp2_strm_init initializes |strm|. + */ +void ngtcp2_strm_init(ngtcp2_strm *strm, int64_t stream_id, uint32_t flags, + uint64_t max_rx_offset, uint64_t max_tx_offset, + void *stream_user_data, ngtcp2_objalloc *frc_objalloc, + const ngtcp2_mem *mem); + +/* + * ngtcp2_strm_free deallocates memory allocated for |strm|. This + * function does not free the memory pointed by |strm| itself. + */ +void ngtcp2_strm_free(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_rx_offset returns the minimum offset of stream data + * which is not received yet. + */ +uint64_t ngtcp2_strm_rx_offset(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_recv_reordering handles reordered data. + * + * It returns 0 if it succeeds, or one of the following negative error + * codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_strm_recv_reordering(ngtcp2_strm *strm, const uint8_t *data, + size_t datalen, uint64_t offset); + +/* + * ngtcp2_strm_update_rx_offset tells that data up to offset bytes are + * received in order. + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_strm_update_rx_offset(ngtcp2_strm *strm, uint64_t offset); + +/* + * ngtcp2_strm_shutdown shutdowns |strm|. |flags| should be + * NGTCP2_STRM_FLAG_SHUT_RD, and/or NGTCP2_STRM_FLAG_SHUT_WR. + */ +void ngtcp2_strm_shutdown(ngtcp2_strm *strm, uint32_t flags); + +/* + * ngtcp2_strm_streamfrq_push pushes |frc| to streamfrq for + * retransmission. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_strm_streamfrq_push(ngtcp2_strm *strm, ngtcp2_frame_chain *frc); + +/* + * ngtcp2_strm_streamfrq_pop pops the first ngtcp2_frame_chain and + * assigns it to |*pfrc|. This function splits into or merges several + * ngtcp2_frame_chain objects so that the returned ngtcp2_frame_chain + * has at most |left| data length. If there is no frames to send, + * this function returns 0 and |*pfrc| is NULL. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * NGTCP2_ERR_NOMEM + * Out of memory + */ +int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc, + size_t left); + +/* + * ngtcp2_strm_streamfrq_unacked_offset returns the smallest offset of + * unacknowledged stream data held in strm->tx.streamfrq. + */ +uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_streamfrq_top returns the first ngtcp2_frame_chain. + * The queue must not be empty. + */ +ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_streamfrq_empty returns nonzero if streamfrq is empty. + */ +int ngtcp2_strm_streamfrq_empty(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_streamfrq_clear removes all frames from streamfrq. + */ +void ngtcp2_strm_streamfrq_clear(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_is_tx_queued returns nonzero if |strm| is queued. + */ +int ngtcp2_strm_is_tx_queued(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_is_all_tx_data_acked returns nonzero if all outgoing + * data for |strm| which have sent so far have been acknowledged. + */ +int ngtcp2_strm_is_all_tx_data_acked(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_is_all_tx_data_fin_acked behaves like + * ngtcp2_strm_is_all_tx_data_acked, but it also requires that STREAM + * frame with fin bit set is acknowledged. + */ +int ngtcp2_strm_is_all_tx_data_fin_acked(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_get_unacked_range_after returns the range that is not + * acknowledged yet and intersects or comes after |offset|. + */ +ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm, + uint64_t offset); + +/* + * ngtcp2_strm_get_acked_offset returns offset, that is the data up to + * this offset have been acknowledged by a remote endpoint. It + * returns 0 if no data is acknowledged. + */ +uint64_t ngtcp2_strm_get_acked_offset(ngtcp2_strm *strm); + +/* + * ngtcp2_strm_ack_data tells |strm| that the data [offset, + * offset+len) is acknowledged by a remote endpoint. + */ +int ngtcp2_strm_ack_data(ngtcp2_strm *strm, uint64_t offset, uint64_t len); + +/* + * ngtcp2_strm_set_app_error_code sets |app_error_code| to |strm| and + * set NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET flag. If the flag is + * already set, this function does nothing. + */ +void ngtcp2_strm_set_app_error_code(ngtcp2_strm *strm, uint64_t app_error_code); + +#endif /* NGTCP2_STRM_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c new file mode 100644 index 0000000..7c7d9ae --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c @@ -0,0 +1,71 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_unreachable.h" + +#include <stdio.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif /* HAVE_UNISTD_H */ +#include <stdlib.h> +#ifdef WIN32 +# include <io.h> +#endif /* WIN32 */ + +void ngtcp2_unreachable_fail(const char *file, int line, const char *func) { + char *buf; + size_t buflen; + int rv; + +#define NGTCP2_UNREACHABLE_TEMPLATE "%s:%d %s: Unreachable.\n" + + rv = snprintf(NULL, 0, NGTCP2_UNREACHABLE_TEMPLATE, file, line, func); + if (rv < 0) { + abort(); + } + + /* here we explicitly use system malloc */ + buflen = (size_t)rv + 1; + buf = malloc(buflen); + if (buf == NULL) { + abort(); + } + + rv = snprintf(buf, buflen, NGTCP2_UNREACHABLE_TEMPLATE, file, line, func); + if (rv < 0) { + abort(); + } + +#ifndef WIN32 + while (write(STDERR_FILENO, buf, (size_t)rv) == -1 && errno == EINTR) + ; +#else /* WIN32 */ + _write(_fileno(stderr), buf, (unsigned int)rv); +#endif /* WIN32 */ + + free(buf); + + abort(); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h new file mode 100644 index 0000000..11a6aaa --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h @@ -0,0 +1,46 @@ +/* + * ngtcp2 + * + * Copyright (c) 2022 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_UNREACHABLE_H +#define NGTCP2_UNREACHABLE_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#define ngtcp2_unreachable() \ + ngtcp2_unreachable_fail(__FILE__, __LINE__, __func__) + +#ifdef _MSC_VER +__declspec(noreturn) +#endif /* _MSC_VER */ + void ngtcp2_unreachable_fail(const char *file, int line, const char *func) +#ifndef _MSC_VER + __attribute__((noreturn)) +#endif /* !_MSC_VER */ + ; + +#endif /* NGTCP2_UNREACHABLE_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c new file mode 100644 index 0000000..257332e --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c @@ -0,0 +1,243 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ngtcp2_vec.h" + +#include <string.h> +#include <assert.h> + +#include "ngtcp2_str.h" + +ngtcp2_vec *ngtcp2_vec_init(ngtcp2_vec *vec, const uint8_t *base, size_t len) { + vec->base = (uint8_t *)base; + vec->len = len; + return vec; +} + +int ngtcp2_vec_new(ngtcp2_vec **pvec, const uint8_t *data, size_t datalen, + const ngtcp2_mem *mem) { + size_t len; + uint8_t *p; + + len = sizeof(ngtcp2_vec) + datalen; + + *pvec = ngtcp2_mem_malloc(mem, len); + if (*pvec == NULL) { + return NGTCP2_ERR_NOMEM; + } + + p = (uint8_t *)(*pvec) + sizeof(ngtcp2_vec); + (*pvec)->base = p; + (*pvec)->len = datalen; + if (datalen) { + /* p = */ ngtcp2_cpymem(p, data, datalen); + } + + return 0; +} + +void ngtcp2_vec_del(ngtcp2_vec *vec, const ngtcp2_mem *mem) { + ngtcp2_mem_free(mem, vec); +} + +uint64_t ngtcp2_vec_len(const ngtcp2_vec *vec, size_t n) { + size_t i; + size_t res = 0; + + for (i = 0; i < n; ++i) { + res += vec[i].len; + } + + return res; +} + +int64_t ngtcp2_vec_len_varint(const ngtcp2_vec *vec, size_t n) { + uint64_t res = 0; + size_t len; + size_t i; + + for (i = 0; i < n; ++i) { + len = vec[i].len; + if (len > NGTCP2_MAX_VARINT - res) { + return -1; + } + + res += len; + } + + return (int64_t)res; +} + +ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *src, size_t *psrccnt, ngtcp2_vec *dst, + size_t *pdstcnt, size_t left, size_t maxcnt) { + size_t i; + size_t srccnt = *psrccnt; + size_t nmove; + size_t extra = 0; + + for (i = 0; i < srccnt; ++i) { + if (left >= src[i].len) { + left -= src[i].len; + continue; + } + + if (*pdstcnt && src[srccnt - 1].base + src[srccnt - 1].len == dst[0].base) { + if (*pdstcnt + srccnt - i - 1 > maxcnt) { + return -1; + } + + dst[0].len += src[srccnt - 1].len; + dst[0].base = src[srccnt - 1].base; + extra = src[srccnt - 1].len; + --srccnt; + } else if (*pdstcnt + srccnt - i > maxcnt) { + return -1; + } + + if (left == 0) { + *psrccnt = i; + } else { + *psrccnt = i + 1; + } + + nmove = srccnt - i; + if (nmove) { + memmove(dst + nmove, dst, sizeof(ngtcp2_vec) * (*pdstcnt)); + *pdstcnt += nmove; + memcpy(dst, src + i, sizeof(ngtcp2_vec) * nmove); + } + + dst[0].len -= left; + dst[0].base += left; + src[i].len = left; + + if (nmove == 0) { + extra -= left; + } + + return (ngtcp2_ssize)(ngtcp2_vec_len(dst, nmove) + extra); + } + + return 0; +} + +size_t ngtcp2_vec_merge(ngtcp2_vec *dst, size_t *pdstcnt, ngtcp2_vec *src, + size_t *psrccnt, size_t left, size_t maxcnt) { + size_t orig_left = left; + size_t i; + ngtcp2_vec *a, *b; + + assert(maxcnt); + + if (*pdstcnt == 0) { + if (*psrccnt == 0) { + return 0; + } + + a = &dst[0]; + b = &src[0]; + + if (left >= b->len) { + *a = *b; + ++*pdstcnt; + left -= b->len; + i = 1; + } else { + a->len = left; + a->base = b->base; + + b->len -= left; + b->base += left; + + return left; + } + } else { + i = 0; + } + + for (; left && i < *psrccnt; ++i) { + a = &dst[*pdstcnt - 1]; + b = &src[i]; + + if (left >= b->len) { + if (a->base + a->len == b->base) { + a->len += b->len; + } else if (*pdstcnt == maxcnt) { + break; + } else { + dst[(*pdstcnt)++] = *b; + } + left -= b->len; + continue; + } + + if (a->base + a->len == b->base) { + a->len += left; + } else if (*pdstcnt == maxcnt) { + break; + } else { + dst[*pdstcnt].len = left; + dst[*pdstcnt].base = b->base; + ++*pdstcnt; + } + + b->len -= left; + b->base += left; + left = 0; + + break; + } + + memmove(src, src + i, sizeof(ngtcp2_vec) * (*psrccnt - i)); + *psrccnt -= i; + + return orig_left - left; +} + +size_t ngtcp2_vec_copy_at_most(ngtcp2_vec *dst, size_t dstcnt, + const ngtcp2_vec *src, size_t srccnt, + size_t left) { + size_t i, j; + + for (i = 0, j = 0; left > 0 && i < srccnt && j < dstcnt;) { + if (src[i].len == 0) { + ++i; + continue; + } + dst[j] = src[i]; + if (dst[j].len > left) { + dst[j].len = left; + return j + 1; + } + left -= dst[j].len; + ++i; + ++j; + } + + return j; +} + +void ngtcp2_vec_copy(ngtcp2_vec *dst, const ngtcp2_vec *src, size_t cnt) { + memcpy(dst, src, sizeof(ngtcp2_vec) * cnt); +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h new file mode 100644 index 0000000..a39c439 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h @@ -0,0 +1,120 @@ +/* + * ngtcp2 + * + * Copyright (c) 2018 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_VEC_H +#define NGTCP2_VEC_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +#include "ngtcp2_mem.h" + +/* + * ngtcp2_vec_lit is a convenient macro to fill the object pointed by + * |DEST| with the literal string |LIT|. + */ +#define ngtcp2_vec_lit(DEST, LIT) \ + ((DEST)->base = (uint8_t *)(LIT), (DEST)->len = sizeof(LIT) - 1, (DEST)) + +/* + * ngtcp2_vec_init initializes |vec| with the given parameters. It + * returns |vec|. + */ +ngtcp2_vec *ngtcp2_vec_init(ngtcp2_vec *vec, const uint8_t *base, size_t len); + +/* + * ngtcp2_vec_new allocates and initializes |*pvec| with given |data| + * of length |datalen|. This function allocates memory for |*pvec| + * and the given data with a single allocation, and the contents + * pointed by |data| is copied into the allocated memory space. To + * free the allocated memory, call ngtcp2_vec_del. + */ +int ngtcp2_vec_new(ngtcp2_vec **pvec, const uint8_t *data, size_t datalen, + const ngtcp2_mem *mem); + +/* + * ngtcp2_vec_del frees the memory allocated by |vec| which is + * allocated and initialized by ngtcp2_vec_new. + */ +void ngtcp2_vec_del(ngtcp2_vec *vec, const ngtcp2_mem *mem); + +/* + * ngtcp2_vec_len returns the sum of length in |vec| of |n| elements. + */ +uint64_t ngtcp2_vec_len(const ngtcp2_vec *vec, size_t n); + +/* + * ngtcp2_vec_len_varint is similar to ngtcp2_vec_len, but it returns + * -1 if the sum of the length exceeds NGTCP2_MAX_VARINT. + */ +int64_t ngtcp2_vec_len_varint(const ngtcp2_vec *vec, size_t n); + +/* + * ngtcp2_vec_split splits |src| to |dst| so that the sum of the + * length in |src| does not exceed |left| bytes. The |maxcnt| is the + * maximum number of elements which |dst| array can contain. The + * caller must set |*psrccnt| to the number of elements of |src|. + * Similarly, the caller must set |*pdstcnt| to the number of elements + * of |dst|. The split does not necessarily occur at the boundary of + * ngtcp2_vec object. After split has done, this function updates + * |*psrccnt| and |*pdstcnt|. This function returns the number of + * bytes moved from |src| to |dst|. If split cannot be made because + * doing so exceeds |maxcnt|, this function returns -1. + */ +ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *src, size_t *psrccnt, ngtcp2_vec *dst, + size_t *pdstcnt, size_t left, size_t maxcnt); + +/* + * ngtcp2_vec_merge merges |src| into |dst| by moving at most |left| + * bytes from |src|. The |maxcnt| is the maximum number of elements + * which |dst| array can contain. The caller must set |*pdstcnt| to + * the number of elements of |dst|. Similarly, the caller must set + * |*psrccnt| to the number of elements of |src|. After merge has + * done, this function updates |*psrccnt| and |*pdstcnt|. This + * function returns the number of bytes moved from |src| to |dst|. + */ +size_t ngtcp2_vec_merge(ngtcp2_vec *dst, size_t *pdstcnt, ngtcp2_vec *src, + size_t *psrccnt, size_t left, size_t maxcnt); + +/* + * ngtcp2_vec_copy_at_most copies |src| of length |srccnt| to |dst| of + * length |dstcnt|. The total number of bytes which the copied + * ngtcp2_vec refers to is at most |left|. The empty elements in + * |src| are ignored. This function returns the number of elements + * copied. + */ +size_t ngtcp2_vec_copy_at_most(ngtcp2_vec *dst, size_t dstcnt, + const ngtcp2_vec *src, size_t srccnt, + size_t left); + +/* + * ngtcp2_vec_copy copies |src| of length |cnt| to |dst|. |dst| must + * have sufficient capacity. + */ +void ngtcp2_vec_copy(ngtcp2_vec *dst, const ngtcp2_vec *src, size_t cnt); + +#endif /* NGTCP2_VEC_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c new file mode 100644 index 0000000..b31162c --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c @@ -0,0 +1,39 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +static ngtcp2_info version = {NGTCP2_VERSION_AGE, NGTCP2_VERSION_NUM, + NGTCP2_VERSION}; + +const ngtcp2_info *ngtcp2_version(int least_version) { + if (least_version > NGTCP2_VERSION_NUM) { + return NULL; + } + return &version; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c new file mode 100644 index 0000000..71c816e --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c @@ -0,0 +1,99 @@ +/* + * ngtcp2 + * + * Copyright (c) 2021 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Translated to C from the original C++ code + * https://quiche.googlesource.com/quiche/+/5be974e29f7e71a196e726d6e2272676d33ab77d/quic/core/congestion_control/windowed_filter.h + * with the following license: + * + * // Copyright (c) 2016 The Chromium Authors. All rights reserved. + * // Use of this source code is governed by a BSD-style license that can be + * // found in the LICENSE file. + */ +#include "ngtcp2_window_filter.h" + +#include <string.h> + +void ngtcp2_window_filter_init(ngtcp2_window_filter *wf, + uint64_t window_length) { + wf->window_length = window_length; + memset(wf->estimates, 0, sizeof(wf->estimates)); +} + +void ngtcp2_window_filter_update(ngtcp2_window_filter *wf, uint64_t new_sample, + uint64_t new_time) { + if (wf->estimates[0].sample == 0 || new_sample > wf->estimates[0].sample || + new_time - wf->estimates[2].time > wf->window_length) { + ngtcp2_window_filter_reset(wf, new_sample, new_time); + return; + } + + if (new_sample > wf->estimates[1].sample) { + wf->estimates[1].sample = new_sample; + wf->estimates[1].time = new_time; + wf->estimates[2] = wf->estimates[1]; + } else if (new_sample > wf->estimates[2].sample) { + wf->estimates[2].sample = new_sample; + wf->estimates[2].time = new_time; + } + + if (new_time - wf->estimates[0].time > wf->window_length) { + wf->estimates[0] = wf->estimates[1]; + wf->estimates[1] = wf->estimates[2]; + wf->estimates[2].sample = new_sample; + wf->estimates[2].time = new_time; + + if (new_time - wf->estimates[0].time > wf->window_length) { + wf->estimates[0] = wf->estimates[1]; + wf->estimates[1] = wf->estimates[2]; + } + return; + } + + if (wf->estimates[1].sample == wf->estimates[0].sample && + new_time - wf->estimates[1].time > wf->window_length >> 2) { + wf->estimates[2].sample = new_sample; + wf->estimates[2].time = new_time; + wf->estimates[1] = wf->estimates[2]; + return; + } + + if (wf->estimates[2].sample == wf->estimates[1].sample && + new_time - wf->estimates[2].time > wf->window_length >> 1) { + wf->estimates[2].sample = new_sample; + wf->estimates[2].time = new_time; + } +} + +void ngtcp2_window_filter_reset(ngtcp2_window_filter *wf, uint64_t new_sample, + uint64_t new_time) { + wf->estimates[0].sample = new_sample; + wf->estimates[0].time = new_time; + wf->estimates[1] = wf->estimates[2] = wf->estimates[0]; +} + +uint64_t ngtcp2_window_filter_get_best(ngtcp2_window_filter *wf) { + return wf->estimates[0].sample; +} diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h new file mode 100644 index 0000000..50415f1 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h @@ -0,0 +1,65 @@ +/* + * ngtcp2 + * + * Copyright (c) 2021 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Translated to C from the original C++ code + * https://quiche.googlesource.com/quiche/+/5be974e29f7e71a196e726d6e2272676d33ab77d/quic/core/congestion_control/windowed_filter.h + * with the following license: + * + * // Copyright (c) 2016 The Chromium Authors. All rights reserved. + * // Use of this source code is governed by a BSD-style license that can be + * // found in the LICENSE file. + */ +#ifndef NGTCP2_WINDOW_FILTER_H +#define NGTCP2_WINDOW_FILTER_H + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <ngtcp2/ngtcp2.h> + +typedef struct ngtcp2_window_filter_sample { + uint64_t sample; + uint64_t time; +} ngtcp2_window_filter_sample; + +typedef struct ngtcp2_window_filter { + uint64_t window_length; + ngtcp2_window_filter_sample estimates[3]; +} ngtcp2_window_filter; + +void ngtcp2_window_filter_init(ngtcp2_window_filter *wf, + uint64_t window_length); + +void ngtcp2_window_filter_update(ngtcp2_window_filter *wf, uint64_t new_sample, + uint64_t new_time); + +void ngtcp2_window_filter_reset(ngtcp2_window_filter *wf, uint64_t new_sample, + uint64_t new_time); + +uint64_t ngtcp2_window_filter_get_best(ngtcp2_window_filter *wf); + +#endif /* NGTCP2_WINDOW_FILTER_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/ngtcp2.h b/src/contrib/libngtcp2/ngtcp2/ngtcp2.h new file mode 100644 index 0000000..d4ba09d --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/ngtcp2.h @@ -0,0 +1,5906 @@ +/* + * ngtcp2 + * + * Copyright (c) 2017 ngtcp2 contributors + * Copyright (c) 2017 nghttp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_H +#define NGTCP2_H + +/* Define WIN32 when build target is Win32 API (borrowed from + libcurl) */ +#if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32) +# define WIN32 +#endif + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable : 4324) +#endif + +#include <stdlib.h> +#if defined(_MSC_VER) && (_MSC_VER < 1800) +/* MSVC < 2013 does not have inttypes.h because it is not C99 + compliant. See compiler macros and version number in + https://sourceforge.net/p/predef/wiki/Compilers/ */ +# include <stdint.h> +#else /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */ +# include <inttypes.h> +#endif /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */ +#include <sys/types.h> +#include <stdarg.h> +#include <stddef.h> + +#ifndef NGTCP2_USE_GENERIC_SOCKADDR +# ifdef WIN32 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include <ws2tcpip.h> +# else +# include <sys/socket.h> +# include <netinet/in.h> +# endif +#endif + +#ifdef AF_INET +# define NGTCP2_AF_INET AF_INET +#else +# define NGTCP2_AF_INET 2 +#endif + +#ifdef AF_INET6 +# define NGTCP2_AF_INET6 AF_INET6 +#else +# define NGTCP2_AF_INET6 23 +# define NGTCP2_USE_GENERIC_IPV6_SOCKADDR +#endif + +#include <ngtcp2/version.h> + +#ifdef NGTCP2_STATICLIB +# define NGTCP2_EXTERN +#elif defined(WIN32) +# ifdef BUILDING_NGTCP2 +# define NGTCP2_EXTERN __declspec(dllexport) +# else /* !BUILDING_NGTCP2 */ +# define NGTCP2_EXTERN __declspec(dllimport) +# endif /* !BUILDING_NGTCP2 */ +#else /* !defined(WIN32) */ +# ifdef BUILDING_NGTCP2 +# define NGTCP2_EXTERN __attribute__((visibility("default"))) +# else /* !BUILDING_NGTCP2 */ +# define NGTCP2_EXTERN +# endif /* !BUILDING_NGTCP2 */ +#endif /* !defined(WIN32) */ + +#ifdef _MSC_VER +# define NGTCP2_ALIGN(N) __declspec(align(N)) +#else /* !_MSC_VER */ +# define NGTCP2_ALIGN(N) __attribute__((aligned(N))) +#endif /* !_MSC_VER */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @typedef + * + * :type:`ngtcp2_ssize` is signed counterpart of size_t. + */ +typedef ptrdiff_t ngtcp2_ssize; + +/** + * @functypedef + * + * :type:`ngtcp2_malloc` is a custom memory allocator to replace + * :manpage:`malloc(3)`. The |user_data| is + * :member:`ngtcp2_mem.user_data`. + */ +typedef void *(*ngtcp2_malloc)(size_t size, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_free` is a custom memory allocator to replace + * :manpage:`free(3)`. The |user_data| is + * :member:`ngtcp2_mem.user_data`. + */ +typedef void (*ngtcp2_free)(void *ptr, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_calloc` is a custom memory allocator to replace + * :manpage:`calloc(3)`. The |user_data| is the + * :member:`ngtcp2_mem.user_data`. + */ +typedef void *(*ngtcp2_calloc)(size_t nmemb, size_t size, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_realloc` is a custom memory allocator to replace + * :manpage:`realloc(3)`. The |user_data| is the + * :member:`ngtcp2_mem.user_data`. + */ +typedef void *(*ngtcp2_realloc)(void *ptr, size_t size, void *user_data); + +/** + * @struct + * + * :type:`ngtcp2_mem` is a custom memory allocator. The + * :member:`user_data` field is passed to each allocator function. + * This can be used, for example, to achieve per-connection memory + * pool. + * + * In the following example code, ``my_malloc``, ``my_free``, + * ``my_calloc`` and ``my_realloc`` are the replacement of the + * standard allocators :manpage:`malloc(3)`, :manpage:`free(3)`, + * :manpage:`calloc(3)` and :manpage:`realloc(3)` respectively:: + * + * void *my_malloc_cb(size_t size, void *user_data) { + * (void)user_data; + * return my_malloc(size); + * } + * + * void my_free_cb(void *ptr, void *user_data) { + * (void)user_data; + * my_free(ptr); + * } + * + * void *my_calloc_cb(size_t nmemb, size_t size, void *user_data) { + * (void)user_data; + * return my_calloc(nmemb, size); + * } + * + * void *my_realloc_cb(void *ptr, size_t size, void *user_data) { + * (void)user_data; + * return my_realloc(ptr, size); + * } + * + * void conn_new() { + * ngtcp2_mem mem = {NULL, my_malloc_cb, my_free_cb, my_calloc_cb, + * my_realloc_cb}; + * + * ... + * } + */ +typedef struct ngtcp2_mem { + /** + * :member:`user_data` is an arbitrary user supplied data. This + * is passed to each allocator function. + */ + void *user_data; + /** + * :member:`malloc` is a custom allocator function to replace + * :manpage:`malloc(3)`. + */ + ngtcp2_malloc malloc; + /** + * :member:`free` is a custom allocator function to replace + * :manpage:`free(3)`. + */ + ngtcp2_free free; + /** + * :member:`calloc` is a custom allocator function to replace + * :manpage:`calloc(3)`. + */ + ngtcp2_calloc calloc; + /** + * :member:`realloc` is a custom allocator function to replace + * :manpage:`realloc(3)`. + */ + ngtcp2_realloc realloc; +} ngtcp2_mem; + +/** + * @macrosection + * + * Time related macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_SECONDS` is a count of tick which corresponds to 1 second. + */ +#define NGTCP2_SECONDS ((ngtcp2_duration)1000000000ULL) + +/** + * @macro + * + * :macro:`NGTCP2_MILLISECONDS` is a count of tick which corresponds + * to 1 millisecond. + */ +#define NGTCP2_MILLISECONDS ((ngtcp2_duration)1000000ULL) + +/** + * @macro + * + * :macro:`NGTCP2_MICROSECONDS` is a count of tick which corresponds + * to 1 microsecond. + */ +#define NGTCP2_MICROSECONDS ((ngtcp2_duration)1000ULL) + +/** + * @macro + * + * :macro:`NGTCP2_NANOSECONDS` is a count of tick which corresponds to + * 1 nanosecond. + */ +#define NGTCP2_NANOSECONDS ((ngtcp2_duration)1ULL) + +/** + * @macrosection + * + * QUIC protocol version macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_PROTO_VER_V1` is the QUIC version 1. + */ +#define NGTCP2_PROTO_VER_V1 ((uint32_t)0x00000001u) + +/** + * @macro + * + * :macro:`NGTCP2_PROTO_VER_V2` is the QUIC version 2. + * + * https://quicwg.org/quic-v2/draft-ietf-quic-v2.html + */ +#define NGTCP2_PROTO_VER_V2 ((uint32_t)0x6b3343cfu) + +/** + * @macro + * + * :macro:`NGTCP2_PROTO_VER_DRAFT_MAX` is the maximum QUIC draft + * version that this library supports. + */ +#define NGTCP2_PROTO_VER_DRAFT_MAX 0xff000020u + +/** + * @macro + * + * :macro:`NGTCP2_PROTO_VER_DRAFT_MIN` is the minimum QUIC draft + * version that this library supports. + */ +#define NGTCP2_PROTO_VER_DRAFT_MIN 0xff00001du + +/** + * @macro + * + * :macro:`NGTCP2_PROTO_VER_MAX` is the highest QUIC version that this + * library supports. + */ +#define NGTCP2_PROTO_VER_MAX NGTCP2_PROTO_VER_V1 + +/** + * @macro + * + * :macro:`NGTCP2_PROTO_VER_MIN` is the lowest QUIC version that this + * library supports. + */ +#define NGTCP2_PROTO_VER_MIN NGTCP2_PROTO_VER_DRAFT_MIN + +/** + * @macro + * + * :macro:`NGTCP2_RESERVED_VERSION_MASK` is the bit mask of reserved + * version. + */ +#define NGTCP2_RESERVED_VERSION_MASK 0x0a0a0a0au + +/** + * @macrosection + * + * UDP datagram related macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE` is the default maximum UDP + * datagram payload size that this endpoint transmits. + */ +#define NGTCP2_MAX_UDP_PAYLOAD_SIZE 1200 + +/** + * @macro + * + * :macro:`NGTCP2_MAX_PMTUD_UDP_PAYLOAD_SIZE` is the maximum UDP + * datagram payload size that Path MTU Discovery can discover. + */ +#define NGTCP2_MAX_PMTUD_UDP_PAYLOAD_SIZE 1452 + +/** + * @macrosection + * + * QUIC specific macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_MAX_VARINT` is the maximum value which can be + * encoded in variable-length integer encoding. + */ +#define NGTCP2_MAX_VARINT ((1ULL << 62) - 1) + +/** + * @macro + * + * :macro:`NGTCP2_STATELESS_RESET_TOKENLEN` is the length of Stateless + * Reset Token. + */ +#define NGTCP2_STATELESS_RESET_TOKENLEN 16 + +/** + * @macro + * + * :macro:`NGTCP2_MIN_STATELESS_RESET_RANDLEN` is the minimum length + * of random bytes (Unpredictable Bits) in Stateless Reset packet + */ +#define NGTCP2_MIN_STATELESS_RESET_RANDLEN 5 + +/** + * @macro + * + * :macro:`NGTCP2_PATH_CHALLENGE_DATALEN` is the length of + * PATH_CHALLENGE data. + */ +#define NGTCP2_PATH_CHALLENGE_DATALEN 8 + +/** + * @macro + * + * :macro:`NGTCP2_RETRY_KEY_DRAFT` is an encryption key to create + * integrity tag of Retry packet. It is used for QUIC draft versions. + */ +#define NGTCP2_RETRY_KEY_DRAFT \ + "\xcc\xce\x18\x7e\xd0\x9a\x09\xd0\x57\x28\x15\x5a\x6c\xb9\x6b\xe1" + +/** + * @macro + * + * :macro:`NGTCP2_RETRY_NONCE_DRAFT` is nonce used when generating + * integrity tag of Retry packet. It is used for QUIC draft versions. + */ +#define NGTCP2_RETRY_NONCE_DRAFT \ + "\xe5\x49\x30\xf9\x7f\x21\x36\xf0\x53\x0a\x8c\x1c" + +/** + * @macro + * + * :macro:`NGTCP2_RETRY_KEY_V1` is an encryption key to create + * integrity tag of Retry packet. It is used for QUIC v1. + */ +#define NGTCP2_RETRY_KEY_V1 \ + "\xbe\x0c\x69\x0b\x9f\x66\x57\x5a\x1d\x76\x6b\x54\xe3\x68\xc8\x4e" + +/** + * @macro + * + * :macro:`NGTCP2_RETRY_NONCE_V1` is nonce used when generating integrity + * tag of Retry packet. It is used for QUIC v1. + */ +#define NGTCP2_RETRY_NONCE_V1 "\x46\x15\x99\xd3\x5d\x63\x2b\xf2\x23\x98\x25\xbb" + +/** + * @macro + * + * :macro:`NGTCP2_RETRY_KEY_V2` is an encryption key to create + * integrity tag of Retry packet. It is used for QUIC v2. + * + * https://quicwg.org/quic-v2/draft-ietf-quic-v2.html + */ +#define NGTCP2_RETRY_KEY_V2 \ + "\x8f\xb4\xb0\x1b\x56\xac\x48\xe2\x60\xfb\xcb\xce\xad\x7c\xcc\x92" + +/** + * @macro + * + * :macro:`NGTCP2_RETRY_NONCE_V2` is nonce used when generating + * integrity tag of Retry packet. It is used for QUIC v2. + * + * https://quicwg.org/quic-v2/draft-ietf-quic-v2.html + */ +#define NGTCP2_RETRY_NONCE_V2 "\xd8\x69\x69\xbc\x2d\x7c\x6d\x99\x90\xef\xb0\x4a" + +/** + * @macro + * + * :macro:`NGTCP2_HP_MASKLEN` is the length of header protection mask. + */ +#define NGTCP2_HP_MASKLEN 5 + +/** + * @macro + * + * :macro:`NGTCP2_HP_SAMPLELEN` is the number bytes sampled when + * encrypting a packet header. + */ +#define NGTCP2_HP_SAMPLELEN 16 + +/** + * @macro + * + * :macro:`NGTCP2_DEFAULT_INITIAL_RTT` is a default initial RTT. + */ +#define NGTCP2_DEFAULT_INITIAL_RTT (333 * NGTCP2_MILLISECONDS) + +/** + * @macro + * + * :macro:`NGTCP2_MAX_CIDLEN` is the maximum length of Connection ID. + */ +#define NGTCP2_MAX_CIDLEN 20 + +/** + * @macro + * + * :macro:`NGTCP2_MIN_CIDLEN` is the minimum length of Connection ID. + */ +#define NGTCP2_MIN_CIDLEN 1 + +/** + * @macro + * + * :macro:`NGTCP2_MIN_INITIAL_DCIDLEN` is the minimum length of + * Destination Connection ID in Client Initial packet if it does not + * bear token from Retry packet. + */ +#define NGTCP2_MIN_INITIAL_DCIDLEN 8 + +/** + * @macro + * + * :macro:`NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT` is the default handshake + * timeout. + */ +#define NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT (10 * NGTCP2_SECONDS) + +/** + * @macrosection + * + * ECN related macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_ECN_NOT_ECT` indicates no ECN marking. + */ +#define NGTCP2_ECN_NOT_ECT 0x0 + +/** + * @macro + * + * :macro:`NGTCP2_ECN_ECT_1` is ECT(1) codepoint. + */ +#define NGTCP2_ECN_ECT_1 0x1 + +/** + * @macro + * + * :macro:`NGTCP2_ECN_ECT_0` is ECT(0) codepoint. + */ +#define NGTCP2_ECN_ECT_0 0x2 + +/** + * @macro + * + * :macro:`NGTCP2_ECN_CE` is CE codepoint. + */ +#define NGTCP2_ECN_CE 0x3 + +/** + * @macro + * + * :macro:`NGTCP2_ECN_MASK` is a bit mask to get ECN marking. + */ +#define NGTCP2_ECN_MASK 0x3 + +#define NGTCP2_PKT_INFO_VERSION_V1 1 +#define NGTCP2_PKT_INFO_VERSION NGTCP2_PKT_INFO_VERSION_V1 + +/** + * @struct + * + * :type:`ngtcp2_pkt_info` is a packet metadata. + */ +typedef struct NGTCP2_ALIGN(8) ngtcp2_pkt_info { + /** + * :member:`ecn` is ECN marking and when passing + * `ngtcp2_conn_read_pkt()`, and it should be either + * :macro:`NGTCP2_ECN_NOT_ECT`, :macro:`NGTCP2_ECN_ECT_1`, + * :macro:`NGTCP2_ECN_ECT_0`, or :macro:`NGTCP2_ECN_CE`. + */ + uint32_t ecn; +} ngtcp2_pkt_info; + +/** + * @macrosection + * + * ngtcp2 library error codes + */ + +/** + * @macro + * + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` indicates that a passed + * argument is invalid. + */ +#define NGTCP2_ERR_INVALID_ARGUMENT -201 +/** + * @macro + * + * :macro:`NGTCP2_ERR_NOBUF` indicates that a provided buffer does not + * have enough space to store data. + */ +#define NGTCP2_ERR_NOBUF -203 +/** + * @macro + * + * :macro:`NGTCP2_ERR_PROTO` indicates a general protocol error. + */ +#define NGTCP2_ERR_PROTO -205 +/** + * @macro + * + * :macro:`NGTCP2_ERR_INVALID_STATE` indicates that a requested + * operation is not allowed at the current connection state. + */ +#define NGTCP2_ERR_INVALID_STATE -206 +/** + * @macro + * + * :macro:`NGTCP2_ERR_ACK_FRAME` indicates that an invalid ACK frame + * is received. + */ +#define NGTCP2_ERR_ACK_FRAME -207 +/** + * @macro + * + * :macro:`NGTCP2_ERR_STREAM_ID_BLOCKED` indicates that there is no + * spare stream ID available. + */ +#define NGTCP2_ERR_STREAM_ID_BLOCKED -208 +/** + * @macro + * + * :macro:`NGTCP2_ERR_STREAM_IN_USE` indicates that a stream ID is + * already in use. + */ +#define NGTCP2_ERR_STREAM_IN_USE -209 +/** + * @macro + * + * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED` indicates that stream data + * cannot be sent because of flow control. + */ +#define NGTCP2_ERR_STREAM_DATA_BLOCKED -210 +/** + * @macro + * + * :macro:`NGTCP2_ERR_FLOW_CONTROL` indicates flow control error. + */ +#define NGTCP2_ERR_FLOW_CONTROL -211 +/** + * @macro + * + * :macro:`NGTCP2_ERR_CONNECTION_ID_LIMIT` indicates that the number + * of received Connection ID exceeds acceptable limit. + */ +#define NGTCP2_ERR_CONNECTION_ID_LIMIT -212 +/** + * @macro + * + * :macro:`NGTCP2_ERR_STREAM_LIMIT` indicates that a remote endpoint + * opens more streams that is permitted. + */ +#define NGTCP2_ERR_STREAM_LIMIT -213 +/** + * @macro + * + * :macro:`NGTCP2_ERR_FINAL_SIZE` indicates that inconsistent final + * size of a stream. + */ +#define NGTCP2_ERR_FINAL_SIZE -214 +/** + * @macro + * + * :macro:`NGTCP2_ERR_CRYPTO` indicates crypto (TLS) related error. + */ +#define NGTCP2_ERR_CRYPTO -215 +/** + * @macro + * + * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED` indicates that packet number + * is exhausted. + */ +#define NGTCP2_ERR_PKT_NUM_EXHAUSTED -216 +/** + * @macro + * + * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM` indicates that a + * required transport parameter is missing. + */ +#define NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM -217 +/** + * @macro + * + * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM` indicates that a + * transport parameter is malformed. + */ +#define NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM -218 +/** + * @macro + * + * :macro:`NGTCP2_ERR_FRAME_ENCODING` indicates there is an error in + * frame encoding. + */ +#define NGTCP2_ERR_FRAME_ENCODING -219 +/** + * @macro + * + * :macro:`NGTCP2_ERR_DECRYPT` indicates a decryption failure. + */ +#define NGTCP2_ERR_DECRYPT -220 +/** + * @macro + * + * :macro:`NGTCP2_ERR_STREAM_SHUT_WR` indicates no more data can be + * sent to a stream. + */ +#define NGTCP2_ERR_STREAM_SHUT_WR -221 +/** + * @macro + * + * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND` indicates that a stream was not + * found. + */ +#define NGTCP2_ERR_STREAM_NOT_FOUND -222 +/** + * @macro + * + * :macro:`NGTCP2_ERR_STREAM_STATE` indicates that a requested + * operation is not allowed at the current stream state. + */ +#define NGTCP2_ERR_STREAM_STATE -226 +/** + * @macro + * + * :macro:`NGTCP2_ERR_RECV_VERSION_NEGOTIATION` indicates that Version + * Negotiation packet was received. + */ +#define NGTCP2_ERR_RECV_VERSION_NEGOTIATION -229 +/** + * @macro + * + * :macro:`NGTCP2_ERR_CLOSING` indicates that connection is in closing + * state. + */ +#define NGTCP2_ERR_CLOSING -230 +/** + * @macro + * + * :macro:`NGTCP2_ERR_DRAINING` indicates that connection is in + * draining state. + */ +#define NGTCP2_ERR_DRAINING -231 +/** + * @macro + * + * :macro:`NGTCP2_ERR_TRANSPORT_PARAM` indicates a general transport + * parameter error. + */ +#define NGTCP2_ERR_TRANSPORT_PARAM -234 +/** + * @macro + * + * :macro:`NGTCP2_ERR_DISCARD_PKT` indicates a packet was discarded. + */ +#define NGTCP2_ERR_DISCARD_PKT -235 +/** + * @macro + * + * :macro:`NGTCP2_ERR_CONN_ID_BLOCKED` indicates that there is no + * spare Connection ID available. + */ +#define NGTCP2_ERR_CONN_ID_BLOCKED -237 +/** + * @macro + * + * :macro:`NGTCP2_ERR_INTERNAL` indicates an internal error. + */ +#define NGTCP2_ERR_INTERNAL -238 +/** + * @macro + * + * :macro:`NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED` indicates that a crypto + * buffer exceeded. + */ +#define NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED -239 +/** + * @macro + * + * :macro:`NGTCP2_ERR_WRITE_MORE` indicates + * :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` is used and a function call + * succeeded. + */ +#define NGTCP2_ERR_WRITE_MORE -240 +/** + * @macro + * + * :macro:`NGTCP2_ERR_RETRY` indicates that server should send Retry + * packet. + */ +#define NGTCP2_ERR_RETRY -241 +/** + * @macro + * + * :macro:`NGTCP2_ERR_DROP_CONN` indicates that an endpoint should + * drop connection immediately. + */ +#define NGTCP2_ERR_DROP_CONN -242 +/** + * @macro + * + * :macro:`NGTCP2_ERR_AEAD_LIMIT_REACHED` indicates AEAD encryption + * limit is reached and key update is not available. An endpoint + * should drop connection immediately. + */ +#define NGTCP2_ERR_AEAD_LIMIT_REACHED -243 +/** + * @macro + * + * :macro:`NGTCP2_ERR_NO_VIABLE_PATH` indicates that path validation + * could not probe that a path is capable of sending UDP datagram + * payload of size at least 1200 bytes. + */ +#define NGTCP2_ERR_NO_VIABLE_PATH -244 +/** + * @macro + * + * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION` indicates that server + * should send Version Negotiation packet. + */ +#define NGTCP2_ERR_VERSION_NEGOTIATION -245 +/** + * @macro + * + * :macro:`NGTCP2_ERR_HANDSHAKE_TIMEOUT` indicates that QUIC + * connection is not established before the specified deadline. + */ +#define NGTCP2_ERR_HANDSHAKE_TIMEOUT -246 +/** + * @macro + * + * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE` indicates the + * version negotiation failed. + */ +#define NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE -247 +/** + * @macro + * + * :macro:`NGTCP2_ERR_IDLE_CLOSE` indicates the connection should be + * closed silently because of idle timeout. + */ +#define NGTCP2_ERR_IDLE_CLOSE -248 +/** + * @macro + * + * :macro:`NGTCP2_ERR_FATAL` indicates that error codes less than this + * value is fatal error. When this error is returned, an endpoint + * should drop connection immediately. + */ +#define NGTCP2_ERR_FATAL -500 +/** + * @macro + * + * :macro:`NGTCP2_ERR_NOMEM` indicates out of memory. + */ +#define NGTCP2_ERR_NOMEM -501 +/** + * @macro + * + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` indicates that user defined + * callback function failed. + */ +#define NGTCP2_ERR_CALLBACK_FAILURE -502 + +/** + * @macrosection + * + * QUIC packet header flags + */ + +/** + * @macro + * + * :macro:`NGTCP2_PKT_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_PKT_FLAG_NONE 0x00u + +/** + * @macro + * + * :macro:`NGTCP2_PKT_FLAG_LONG_FORM` indicates the Long header packet + * header. + */ +#define NGTCP2_PKT_FLAG_LONG_FORM 0x01u + +/** + * @macro + * + * :macro:`NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR` indicates that Fixed Bit + * (aka QUIC bit) is not set. + */ +#define NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR 0x02u + +/** + * @macro + * + * :macro:`NGTCP2_PKT_FLAG_KEY_PHASE` indicates Key Phase bit set. + */ +#define NGTCP2_PKT_FLAG_KEY_PHASE 0x04u + +/** + * @enum + * + * :type:`ngtcp2_pkt_type` defines QUIC version-independent QUIC + * packet types. + */ +typedef enum ngtcp2_pkt_type { + /** + * :enum:`NGTCP2_PKT_VERSION_NEGOTIATION` is defined by libngtcp2 + * for convenience. + */ + NGTCP2_PKT_VERSION_NEGOTIATION = 0x80, + /** + * :enum:`NGTCP2_PKT_STATELESS_RESET` is defined by libngtcp2 for + * convenience. + */ + NGTCP2_PKT_STATELESS_RESET = 0x81, + /** + * :enum:`NGTCP2_PKT_INITIAL` indicates Initial packet. + */ + NGTCP2_PKT_INITIAL = 0x10, + /** + * :enum:`NGTCP2_PKT_0RTT` indicates 0RTT packet. + */ + NGTCP2_PKT_0RTT = 0x11, + /** + * :enum:`NGTCP2_PKT_HANDSHAKE` indicates Handshake packet. + */ + NGTCP2_PKT_HANDSHAKE = 0x12, + /** + * :enum:`NGTCP2_PKT_RETRY` indicates Retry packet. + */ + NGTCP2_PKT_RETRY = 0x13, + /** + * :enum:`NGTCP2_PKT_1RTT` is defined by libngtcp2 for convenience. + */ + NGTCP2_PKT_1RTT = 0x40 +} ngtcp2_pkt_type; + +/** + * @macrosection + * + * QUIC transport error code + */ + +/** + * @macro + * + * :macro:`NGTCP2_NO_ERROR` is QUIC transport error code ``NO_ERROR``. + */ +#define NGTCP2_NO_ERROR 0x0u + +/** + * @macro + * + * :macro:`NGTCP2_INTERNAL_ERROR` is QUIC transport error code + * ``INTERNAL_ERROR``. + */ +#define NGTCP2_INTERNAL_ERROR 0x1u + +/** + * @macro + * + * :macro:`NGTCP2_CONNECTION_REFUSED` is QUIC transport error code + * ``CONNECTION_REFUSED``. + */ +#define NGTCP2_CONNECTION_REFUSED 0x2u + +/** + * @macro + * + * :macro:`NGTCP2_FLOW_CONTROL_ERROR` is QUIC transport error code + * ``FLOW_CONTROL_ERROR``. + */ +#define NGTCP2_FLOW_CONTROL_ERROR 0x3u + +/** + * @macro + * + * :macro:`NGTCP2_STREAM_LIMIT_ERROR` is QUIC transport error code + * ``STREAM_LIMIT_ERROR``. + */ +#define NGTCP2_STREAM_LIMIT_ERROR 0x4u + +/** + * @macro + * + * :macro:`NGTCP2_STREAM_STATE_ERROR` is QUIC transport error code + * ``STREAM_STATE_ERROR``. + */ +#define NGTCP2_STREAM_STATE_ERROR 0x5u + +/** + * @macro + * + * :macro:`NGTCP2_FINAL_SIZE_ERROR` is QUIC transport error code + * ``FINAL_SIZE_ERROR``. + */ +#define NGTCP2_FINAL_SIZE_ERROR 0x6u + +/** + * @macro + * + * :macro:`NGTCP2_FRAME_ENCODING_ERROR` is QUIC transport error code + * ``FRAME_ENCODING_ERROR``. + */ +#define NGTCP2_FRAME_ENCODING_ERROR 0x7u + +/** + * @macro + * + * :macro:`NGTCP2_TRANSPORT_PARAMETER_ERROR` is QUIC transport error + * code ``TRANSPORT_PARAMETER_ERROR``. + */ +#define NGTCP2_TRANSPORT_PARAMETER_ERROR 0x8u + +/** + * @macro + * + * :macro:`NGTCP2_CONNECTION_ID_LIMIT_ERROR` is QUIC transport error + * code ``CONNECTION_ID_LIMIT_ERROR``. + */ +#define NGTCP2_CONNECTION_ID_LIMIT_ERROR 0x9u + +/** + * @macro + * + * :macro:`NGTCP2_PROTOCOL_VIOLATION` is QUIC transport error code + * ``PROTOCOL_VIOLATION``. + */ +#define NGTCP2_PROTOCOL_VIOLATION 0xau + +/** + * @macro + * + * :macro:`NGTCP2_INVALID_TOKEN` is QUIC transport error code + * ``INVALID_TOKEN``. + */ +#define NGTCP2_INVALID_TOKEN 0xbu + +/** + * @macro + * + * :macro:`NGTCP2_APPLICATION_ERROR` is QUIC transport error code + * ``APPLICATION_ERROR``. + */ +#define NGTCP2_APPLICATION_ERROR 0xcu + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_BUFFER_EXCEEDED` is QUIC transport error code + * ``CRYPTO_BUFFER_EXCEEDED``. + */ +#define NGTCP2_CRYPTO_BUFFER_EXCEEDED 0xdu + +/** + * @macro + * + * :macro:`NGTCP2_KEY_UPDATE_ERROR` is QUIC transport error code + * ``KEY_UPDATE_ERROR``. + */ +#define NGTCP2_KEY_UPDATE_ERROR 0xeu + +/** + * @macro + * + * :macro:`NGTCP2_AEAD_LIMIT_REACHED` is QUIC transport error code + * ``AEAD_LIMIT_REACHED``. + */ +#define NGTCP2_AEAD_LIMIT_REACHED 0xfu + +/** + * @macro + * + * :macro:`NGTCP2_NO_VIABLE_PATH` is QUIC transport error code + * ``NO_VIABLE_PATH``. + */ +#define NGTCP2_NO_VIABLE_PATH 0x10u + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_ERROR` is QUIC transport error code + * ``CRYPTO_ERROR``. + */ +#define NGTCP2_CRYPTO_ERROR 0x100u + +/** + * @macro + * + * :macro:`NGTCP2_VERSION_NEGOTIATION_ERROR` is QUIC transport error + * code ``VERSION_NEGOTIATION_ERROR``. + * + * https://datatracker.ietf.org/doc/html/draft-ietf-quic-version-negotiation-14 + */ +#define NGTCP2_VERSION_NEGOTIATION_ERROR 0x11 + +/** + * @enum + * + * :type:`ngtcp2_path_validation_result` defines path validation + * result code. + */ +typedef enum ngtcp2_path_validation_result { + /** + * :enum:`NGTCP2_PATH_VALIDATION_RESULT_SUCCESS` indicates + * successful validation. + */ + NGTCP2_PATH_VALIDATION_RESULT_SUCCESS, + /** + * :enum:`NGTCP2_PATH_VALIDATION_RESULT_FAILURE` indicates + * validation failure. + */ + NGTCP2_PATH_VALIDATION_RESULT_FAILURE, + /** + * :enum:`NGTCP2_PATH_VALIDATION_RESULT_ABORTED` indicates that path + * validation was aborted. + */ + NGTCP2_PATH_VALIDATION_RESULT_ABORTED +} ngtcp2_path_validation_result; + +/** + * @typedef + * + * :type:`ngtcp2_tstamp` is a timestamp with nanosecond resolution. + * ``UINT64_MAX`` is an invalid value. + */ +typedef uint64_t ngtcp2_tstamp; + +/** + * @typedef + * + * :type:`ngtcp2_duration` is a period of time in nanosecond + * resolution. ``UINT64_MAX`` is an invalid value. + */ +typedef uint64_t ngtcp2_duration; + +/** + * @struct + * + * :type:`ngtcp2_cid` holds a Connection ID. + */ +typedef struct ngtcp2_cid { + /** + * :member:`datalen` is the length of Connection ID. + */ + size_t datalen; + /** + * :member:`data` is the buffer to store Connection ID. + */ + uint8_t data[NGTCP2_MAX_CIDLEN]; +} ngtcp2_cid; + +/** + * @struct + * + * :type:`ngtcp2_vec` is struct iovec compatible structure to + * reference arbitrary array of bytes. + */ +typedef struct ngtcp2_vec { + /** + * :member:`base` points to the data. + */ + uint8_t *base; + /** + * :member:`len` is the number of bytes which the buffer pointed by + * base contains. + */ + size_t len; +} ngtcp2_vec; + +/** + * @function + * + * `ngtcp2_cid_init` initializes Connection ID |cid| with the byte + * string pointed by |data| and its length is |datalen|. |datalen| + * must be at most :macro:`NGTCP2_MAX_CIDLEN`. + */ +NGTCP2_EXTERN void ngtcp2_cid_init(ngtcp2_cid *cid, const uint8_t *data, + size_t datalen); + +/** + * @function + * + * `ngtcp2_cid_eq` returns nonzero if |a| and |b| share the same + * Connection ID. + */ +NGTCP2_EXTERN int ngtcp2_cid_eq(const ngtcp2_cid *a, const ngtcp2_cid *b); + +/** + * @struct + * + * :type:`ngtcp2_pkt_hd` represents QUIC packet header. + */ +typedef struct ngtcp2_pkt_hd { + /** + * :member:`dcid` is Destination Connection ID. + */ + ngtcp2_cid dcid; + /** + * :member:`scid` is Source Connection ID. + */ + ngtcp2_cid scid; + /** + * :member:`pkt_num` is a packet number. + */ + int64_t pkt_num; + /** + * :member:`token` contains token for Initial + * packet. + */ + const uint8_t *token; + /** + * :member:`tokenlen` is the length of :member:`token`. + */ + size_t tokenlen; + /** + * :member:`pkt_numlen` is the number of bytes spent to encode + * :member:`pkt_num`. + */ + size_t pkt_numlen; + /** + * :member:`len` is the sum of :member:`pkt_numlen` and the length + * of QUIC packet payload. + */ + size_t len; + /** + * :member:`version` is QUIC version. + */ + uint32_t version; + /** + * :member:`type` is a type of QUIC packet. See + * :type:`ngtcp2_pkt_type`. + */ + uint8_t type; + /** + * :member:`flags` is zero or more of :macro:`NGTCP2_PKT_FLAG_* + * <NGTCP2_PKT_FLAG_NONE>`. + */ + uint8_t flags; +} ngtcp2_pkt_hd; + +/** + * @struct + * + * :type:`ngtcp2_pkt_stateless_reset` represents Stateless Reset. + */ +typedef struct ngtcp2_pkt_stateless_reset { + /** + * :member:`stateless_reset_token` contains stateless reset token. + */ + uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN]; + /** + * :member:`rand` points a buffer which contains random bytes + * section. + */ + const uint8_t *rand; + /** + * :member:`randlen` is the number of random bytes. + */ + size_t randlen; +} ngtcp2_pkt_stateless_reset; + +/** + * @enum + * + * :type:`ngtcp2_transport_params_type` defines TLS message type which + * carries transport parameters. + */ +typedef enum ngtcp2_transport_params_type { + /** + * :enum:`NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO` is Client Hello + * TLS message. + */ + NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO, + /** + * :enum:`NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS` is + * Encrypted Extensions TLS message. + */ + NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS +} ngtcp2_transport_params_type; + +/** + * @macrosection + * + * QUIC transport parameters related macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE` is the default + * value of max_udp_payload_size transport parameter. + */ +#define NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE 65527 + +/** + * @macro + * + * :macro:`NGTCP2_DEFAULT_ACK_DELAY_EXPONENT` is a default value of + * scaling factor of ACK Delay field in ACK frame. + */ +#define NGTCP2_DEFAULT_ACK_DELAY_EXPONENT 3 + +/** + * @macro + * + * :macro:`NGTCP2_DEFAULT_MAX_ACK_DELAY` is a default value of the + * maximum amount of time in nanoseconds by which endpoint delays + * sending acknowledgement. + */ +#define NGTCP2_DEFAULT_MAX_ACK_DELAY (25 * NGTCP2_MILLISECONDS) + +/** + * @macro + * + * :macro:`NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT` is the default + * value of active_connection_id_limit transport parameter value if + * omitted. + */ +#define NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT 2 + +/** + * @macro + * + * :macro:`NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_V1` is TLS + * extension type of quic_transport_parameters. + */ +#define NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_V1 0x39u + +/** + * @macro + * + * :macro:`NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_DRAFT` is TLS + * extension type of quic_transport_parameters used during draft + * development. + */ +#define NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_DRAFT 0xffa5u + +#ifdef NGTCP2_USE_GENERIC_SOCKADDR +typedef struct ngtcp2_sockaddr { + uint16_t sa_family; + uint8_t sa_data[14]; +} ngtcp2_sockaddr; + +typedef struct ngtcp2_in_addr { + uint32_t s_addr; +} ngtcp2_in_addr; + +typedef struct ngtcp2_sockaddr_in { + uint16_t sin_family; + uint16_t sin_port; + ngtcp2_in_addr sin_addr; + uint8_t sin_zero[8]; +} ngtcp2_sockaddr_in; + +# define NGTCP2_SS_MAXSIZE 128 +# define NGTCP2_SS_ALIGNSIZE (sizeof(uint64_t)) +# define NGTCP2_SS_PAD1SIZE (NGTCP2_SS_ALIGNSIZE - sizeof(uint16_t)) +# define NGTCP2_SS_PAD2SIZE \ + (NGTCP2_SS_MAXSIZE - \ + (sizeof(uint16_t) + NGTCP2_SS_PAD1SIZE + NGTCP2_SS_ALIGNSIZE)) + +typedef struct ngtcp2_sockaddr_storage { + uint16_t ss_family; + uint8_t _ss_pad1[NGTCP2_SS_PAD1SIZE]; + uint64_t _ss_align; + uint8_t _ss_pad2[NGTCP2_SS_PAD2SIZE]; +} ngtcp2_sockaddr_storage; + +# undef NGTCP2_SS_PAD2SIZE +# undef NGTCP2_SS_PAD1SIZE +# undef NGTCP2_SS_ALIGNSIZE +# undef NGTCP2_SS_MAXSIZE + +typedef uint32_t ngtcp2_socklen; +#else +/** + * @typedef + * + * :type:`ngtcp2_sockaddr` is typedefed to struct sockaddr. If + * :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed to + * the generic struct sockaddr defined in ngtcp2.h. + */ +typedef struct sockaddr ngtcp2_sockaddr; +/** + * @typedef + * + * :type:`ngtcp2_sockaddr_storage` is typedefed to struct + * sockaddr_storage. If :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is + * defined, it is typedefed to the generic struct sockaddr_storage + * defined in ngtcp2.h. + */ +typedef struct sockaddr_storage ngtcp2_sockaddr_storage; +/** + * @typedef + * + * :type:`ngtcp2_sockaddr_in` is typedefed to struct sockaddr_in. If + * :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed to + * the generic struct sockaddr_in defined in ngtcp2.h. + */ +typedef struct sockaddr_in ngtcp2_sockaddr_in; +/** + * @typedef + * + * :type:`ngtcp2_socklen` is typedefed to socklen_t. If + * :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed to + * uint32_t. + */ +typedef socklen_t ngtcp2_socklen; +#endif + +#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || \ + defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR) +typedef struct ngtcp2_in6_addr { + uint8_t in6_addr[16]; +} ngtcp2_in6_addr; + +typedef struct ngtcp2_sockaddr_in6 { + uint16_t sin6_family; + uint16_t sin6_port; + uint32_t sin6_flowinfo; + ngtcp2_in6_addr sin6_addr; + uint32_t sin6_scope_id; +} ngtcp2_sockaddr_in6; +#else +/** + * @typedef + * + * :type:`ngtcp2_sockaddr_in6` is typedefed to struct sockaddr_in6. + * If :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed + * to the generic struct sockaddr_in6 defined in ngtcp2.h. + */ +typedef struct sockaddr_in6 ngtcp2_sockaddr_in6; +#endif + +/** + * @struct + * + * :type:`ngtcp2_sockaddr_union` conveniently includes all supported + * address types. + */ +typedef union ngtcp2_sockaddr_union { + ngtcp2_sockaddr sa; + ngtcp2_sockaddr_in in; + ngtcp2_sockaddr_in6 in6; +} ngtcp2_sockaddr_union; + +/** + * @struct + * + * :type:`ngtcp2_preferred_addr` represents preferred address + * structure. + */ +typedef struct ngtcp2_preferred_addr { + /** + * :member:`cid` is a Connection ID. + */ + ngtcp2_cid cid; + /** + * :member:`ipv4` contains IPv4 address and port. + */ + ngtcp2_sockaddr_in ipv4; + /** + * :member:`ipv6` contains IPv4 address and port. + */ + ngtcp2_sockaddr_in6 ipv6; + /** + * :member:`ipv4_present` indicates that :member:`ipv4` contains + * IPv4 address and port. + */ + uint8_t ipv4_present; + /** + * :member:`ipv6_present` indicates that :member:`ipv6` contains + * IPv6 address and port. + */ + uint8_t ipv6_present; + /** + * :member:`stateless_reset_token` contains stateless reset token. + */ + uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN]; +} ngtcp2_preferred_addr; + +/** + * @struct + * + * :type:`ngtcp2_version_info` represents version_information + * structure. + */ +typedef struct ngtcp2_version_info { + /** + * :member:`chosen_version` is the version chosen by the sender. + */ + uint32_t chosen_version; + /** + * :member:`available_versions` points the wire image of + * available_versions field. The each version is therefore in + * network byte order. + */ + const uint8_t *available_versions; + /** + * :member:`available_versionslen` is the number of bytes pointed by + * :member:`available_versions`, not the number of versions + * included. + */ + size_t available_versionslen; +} ngtcp2_version_info; + +#define NGTCP2_TRANSPORT_PARAMS_VERSION_V1 1 +#define NGTCP2_TRANSPORT_PARAMS_VERSION NGTCP2_TRANSPORT_PARAMS_VERSION_V1 + +/** + * @struct + * + * :type:`ngtcp2_transport_params` represents QUIC transport + * parameters. + */ +typedef struct ngtcp2_transport_params { + /** + * :member:`preferred_address` contains preferred address if + * :member:`preferred_address_present` is nonzero. + */ + ngtcp2_preferred_addr preferred_address; + /** + * :member:`original_dcid` is the Destination Connection ID field + * from the first Initial packet from client. Server must specify + * this field. It is expected that application knows the original + * Destination Connection ID even if it sends Retry packet, for + * example, by including it in retry token. Otherwise, application + * should not specify this field. + */ + ngtcp2_cid original_dcid; + /** + * :member:`initial_scid` is the Source Connection ID field from the + * first Initial packet the endpoint sends. Application should not + * specify this field. + */ + ngtcp2_cid initial_scid; + /** + * :member:`retry_scid` is the Source Connection ID field from Retry + * packet. Only server uses this field. If server application + * received Initial packet with retry token from client and server + * verified its token, server application must set Destination + * Connection ID field from the Initial packet to this field and set + * :member:`retry_scid_present` to nonzero. Server application must + * verify that the Destination Connection ID from Initial packet was + * sent in Retry packet by, for example, including the Connection ID + * in a token, or including it in AAD when encrypting a token. + */ + ngtcp2_cid retry_scid; + /** + * :member:`initial_max_stream_data_bidi_local` is the size of flow + * control window of locally initiated stream. This is the number + * of bytes that the remote endpoint can send and the local endpoint + * must ensure that it has enough buffer to receive them. + */ + uint64_t initial_max_stream_data_bidi_local; + /** + * :member:`initial_max_stream_data_bidi_remote` is the size of flow + * control window of remotely initiated stream. This is the number + * of bytes that the remote endpoint can send and the local endpoint + * must ensure that it has enough buffer to receive them. + */ + uint64_t initial_max_stream_data_bidi_remote; + /** + * :member:`initial_max_stream_data_uni` is the size of flow control + * window of remotely initiated unidirectional stream. This is the + * number of bytes that the remote endpoint can send and the local + * endpoint must ensure that it has enough buffer to receive them. + */ + uint64_t initial_max_stream_data_uni; + /** + * :member:`initial_max_data` is the connection level flow control + * window. + */ + uint64_t initial_max_data; + /** + * :member:`initial_max_streams_bidi` is the number of concurrent + * streams that the remote endpoint can create. + */ + uint64_t initial_max_streams_bidi; + /** + * :member:`initial_max_streams_uni` is the number of concurrent + * unidirectional streams that the remote endpoint can create. + */ + uint64_t initial_max_streams_uni; + /** + * :member:`max_idle_timeout` is a duration during which sender + * allows quiescent. + */ + ngtcp2_duration max_idle_timeout; + /** + * :member:`max_udp_payload_size` is the maximum datagram size that + * the endpoint can receive. + */ + uint64_t max_udp_payload_size; + /** + * :member:`active_connection_id_limit` is the maximum number of + * Connection ID that sender can store. + */ + uint64_t active_connection_id_limit; + /** + * :member:`ack_delay_exponent` is the exponent used in ACK Delay + * field in ACK frame. + */ + uint64_t ack_delay_exponent; + /** + * :member:`max_ack_delay` is the maximum acknowledgement delay by + * which the endpoint will delay sending acknowledgements. + */ + ngtcp2_duration max_ack_delay; + /** + * :member:`max_datagram_frame_size` is the maximum size of DATAGRAM + * frame that this endpoint willingly receives. Specifying 0 + * disables DATAGRAM support. See + * https://datatracker.ietf.org/doc/html/rfc9221 + */ + uint64_t max_datagram_frame_size; + /** + * :member:`stateless_reset_token_present` is nonzero if + * :member:`stateless_reset_token` field is set. + */ + uint8_t stateless_reset_token_present; + /** + * :member:`disable_active_migration` is nonzero if the endpoint + * does not support active connection migration. + */ + uint8_t disable_active_migration; + /** + * :member:`retry_scid_present` is nonzero if :member:`retry_scid` + * field is set. + */ + uint8_t retry_scid_present; + /** + * :member:`preferred_address_present` is nonzero if + * :member:`preferred_address` is set. + */ + uint8_t preferred_address_present; + /** + * :member:`stateless_reset_token` contains stateless reset token. + */ + uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN]; + /** + * :member:`grease_quic_bit` is nonzero if sender supports "Greasing + * the QUIC Bit" extension. See :rfc:`9287`. Note that the local + * endpoint always enables greasing QUIC bit regardless of this + * field value. + */ + uint8_t grease_quic_bit; + /** + * :member:`version_info` contains version_information field if + * :member:`version_info_present` is nonzero. Application should + * not specify this field. + */ + ngtcp2_version_info version_info; + /** + * :member:`version_info_present` is nonzero if + * :member:`version_info` is set. Application should not specify + * this field. + */ + uint8_t version_info_present; +} ngtcp2_transport_params; + +/** + * @enum + * + * :type:`ngtcp2_pktns_id` defines packet number space identifier. + */ +typedef enum ngtcp2_pktns_id { + /** + * :enum:`NGTCP2_PKTNS_ID_INITIAL` is the Initial packet number + * space. + */ + NGTCP2_PKTNS_ID_INITIAL, + /** + * :enum:`NGTCP2_PKTNS_ID_HANDSHAKE` is the Handshake packet number + * space. + */ + NGTCP2_PKTNS_ID_HANDSHAKE, + /** + * :enum:`NGTCP2_PKTNS_ID_APPLICATION` is the Application data + * packet number space. + */ + NGTCP2_PKTNS_ID_APPLICATION, + /** + * :enum:`NGTCP2_PKTNS_ID_MAX` is defined to get the number of + * packet number spaces. + */ + NGTCP2_PKTNS_ID_MAX +} ngtcp2_pktns_id; + +#define NGTCP2_CONN_STAT_VERSION_V1 1 +#define NGTCP2_CONN_STAT_VERSION NGTCP2_CONN_STAT_VERSION_V1 + +/** + * @struct + * + * :type:`ngtcp2_conn_stat` holds various connection statistics, and + * computed data for recovery and congestion controller. + */ +typedef struct ngtcp2_conn_stat { + /** + * :member:`latest_rtt` is the latest RTT sample which is not + * adjusted by acknowledgement delay. + */ + ngtcp2_duration latest_rtt; + /** + * :member:`min_rtt` is the minimum RTT seen so far. It is not + * adjusted by acknowledgement delay. + */ + ngtcp2_duration min_rtt; + /** + * :member:`smoothed_rtt` is the smoothed RTT. + */ + ngtcp2_duration smoothed_rtt; + /** + * :member:`rttvar` is a mean deviation of observed RTT. + */ + ngtcp2_duration rttvar; + /** + * :member:`initial_rtt` is the initial RTT which is used when no + * RTT sample is available. + */ + ngtcp2_duration initial_rtt; + /** + * :member:`first_rtt_sample_ts` is the timestamp when the first RTT + * sample is obtained. + */ + ngtcp2_tstamp first_rtt_sample_ts; + /** + * :member:`pto_count` is the count of successive PTO timer + * expiration. + */ + size_t pto_count; + /** + * :member:`loss_detection_timer` is the deadline of the current + * loss detection timer. + */ + ngtcp2_tstamp loss_detection_timer; + /** + * :member:`last_tx_pkt_ts` corresponds to + * time_of_last_ack_eliciting_packet in :rfc:`9002`. + */ + ngtcp2_tstamp last_tx_pkt_ts[NGTCP2_PKTNS_ID_MAX]; + /** + * :member:`loss_time` corresponds to loss_time in :rfc:`9002`. + */ + ngtcp2_tstamp loss_time[NGTCP2_PKTNS_ID_MAX]; + /** + * :member:`cwnd` is the size of congestion window. + */ + uint64_t cwnd; + /** + * :member:`ssthresh` is slow start threshold. + */ + uint64_t ssthresh; + /** + * :member:`congestion_recovery_start_ts` is the timestamp when + * congestion recovery started. + */ + ngtcp2_tstamp congestion_recovery_start_ts; + /** + * :member:`bytes_in_flight` is the number in bytes of all sent + * packets which have not been acknowledged. + */ + uint64_t bytes_in_flight; + /** + * :member:`max_tx_udp_payload_size` is the maximum size of UDP + * datagram payload that this endpoint transmits. It is used by + * congestion controller to compute congestion window. + */ + size_t max_tx_udp_payload_size; + /** + * :member:`delivery_rate_sec` is the current sending rate measured + * in byte per second. + */ + uint64_t delivery_rate_sec; + /** + * :member:`pacing_rate` is the current packet sending rate computed + * by a congestion controller. 0 if a congestion controller does + * not set pacing rate. Even if this value is set to 0, the library + * paces packets. + */ + double pacing_rate; + /** + * :member:`send_quantum` is the maximum size of a data aggregate + * scheduled and transmitted together. + */ + size_t send_quantum; +} ngtcp2_conn_stat; + +/** + * @enum + * + * :type:`ngtcp2_cc_algo` defines congestion control algorithms. + */ +typedef enum ngtcp2_cc_algo { + /** + * :enum:`NGTCP2_CC_ALGO_RENO` represents Reno. + */ + NGTCP2_CC_ALGO_RENO = 0x00, + /** + * :enum:`NGTCP2_CC_ALGO_CUBIC` represents Cubic. + */ + NGTCP2_CC_ALGO_CUBIC = 0x01, + /** + * :enum:`NGTCP2_CC_ALGO_BBR` represents BBR. + */ + NGTCP2_CC_ALGO_BBR = 0x02, + /** + * :enum:`NGTCP2_CC_ALGO_BBR2` represents BBR v2. + */ + NGTCP2_CC_ALGO_BBR2 = 0x03 +} ngtcp2_cc_algo; + +/** + * @functypedef + * + * :type:`ngtcp2_printf` is a callback function for logging. + * |user_data| is the same object passed to `ngtcp2_conn_client_new` + * or `ngtcp2_conn_server_new`. + */ +typedef void (*ngtcp2_printf)(void *user_data, const char *format, ...); + +/** + * @macrosection + * + * QLog related macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_QLOG_WRITE_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_QLOG_WRITE_FLAG_NONE 0x00u +/** + * @macro + * + * :macro:`NGTCP2_QLOG_WRITE_FLAG_FIN` indicates that this is the + * final call to :type:`ngtcp2_qlog_write` in the current connection. + */ +#define NGTCP2_QLOG_WRITE_FLAG_FIN 0x01u + +/** + * @struct + * + * :type:`ngtcp2_rand_ctx` is a wrapper around native random number + * generator. It is opaque to the ngtcp2 library. This might be + * useful if application needs to specify random number generator per + * thread or per connection. + */ +typedef struct ngtcp2_rand_ctx { + /** + * :member:`native_handle` is a pointer to an underlying random + * number generator. + */ + void *native_handle; +} ngtcp2_rand_ctx; + +/** + * @functypedef + * + * :type:`ngtcp2_qlog_write` is a callback function which is called to + * write qlog |data| of length |datalen| bytes. |flags| is bitwise OR + * of zero or more of :macro:`NGTCP2_QLOG_WRITE_FLAG_* + * <NGTCP2_QLOG_WRITE_FLAG_NONE>`. If + * :macro:`NGTCP2_QLOG_WRITE_FLAG_FIN` is set, |datalen| may be 0. + */ +typedef void (*ngtcp2_qlog_write)(void *user_data, uint32_t flags, + const void *data, size_t datalen); + +/** + * @struct + * + * :type:`ngtcp2_qlog_settings` is a set of settings for qlog. + */ +typedef struct ngtcp2_qlog_settings { + /** + * :member:`odcid` is Original Destination Connection ID sent by + * client. It is used as group_id and ODCID fields. Client ignores + * this field and uses dcid parameter passed to + * `ngtcp2_conn_client_new()`. + */ + ngtcp2_cid odcid; + /** + * :member:`write` is a callback function to write qlog. Setting + * ``NULL`` disables qlog. + */ + ngtcp2_qlog_write write; +} ngtcp2_qlog_settings; + +#define NGTCP2_SETTINGS_VERSION_V1 1 +#define NGTCP2_SETTINGS_VERSION NGTCP2_SETTINGS_VERSION_V1 + +/** + * @struct + * + * :type:`ngtcp2_settings` defines QUIC connection settings. + */ +typedef struct ngtcp2_settings { + /** + * :member:`qlog` is qlog settings. + */ + ngtcp2_qlog_settings qlog; + /** + * :member:`cc_algo` specifies congestion control algorithm. + */ + ngtcp2_cc_algo cc_algo; + /** + * :member:`initial_ts` is an initial timestamp given to the + * library. + */ + ngtcp2_tstamp initial_ts; + /** + * :member:`initial_rtt` is an initial RTT. + */ + ngtcp2_duration initial_rtt; + /** + * :member:`log_printf` is a function that the library uses to write + * logs. ``NULL`` means no logging output. It is nothing to do + * with qlog. + */ + ngtcp2_printf log_printf; + /** + * :member:`max_tx_udp_payload_size` is the maximum size of UDP + * datagram payload that this endpoint transmits. It is used by + * congestion controller to compute congestion window. + */ + size_t max_tx_udp_payload_size; + /** + * :member:`token` is a token from Retry packet or NEW_TOKEN frame. + * + * Server sets this field if it received the token in Client Initial + * packet and successfully validated. + * + * Client sets this field if it intends to send token in its Initial + * packet. + * + * `ngtcp2_conn_server_new` and `ngtcp2_conn_client_new` make a copy + * of token. + */ + const uint8_t *token; + /** + * :member:`tokenlen` is the length of :member:`token`. + */ + size_t tokenlen; + /** + * :member:`rand_ctx` is an optional random number generator to be + * passed to :type:`ngtcp2_rand` callback. + */ + ngtcp2_rand_ctx rand_ctx; + /** + * :member:`max_window` is the maximum connection-level flow control + * window if connection-level window auto-tuning is enabled. The + * connection-level window auto tuning is enabled if nonzero value + * is specified in this field. The initial value of window size is + * :member:`ngtcp2_transport_params.initial_max_data`. The window + * size is scaled up to the value specified in this field. + */ + uint64_t max_window; + /** + * :member:`max_stream_window` is the maximum stream-level flow + * control window if stream-level window auto-tuning is enabled. + * The stream-level window auto-tuning is enabled if nonzero value + * is specified in this field. The initial value of window size is + * :member:`ngtcp2_transport_params.initial_max_stream_data_bidi_remote`, + * :member:`ngtcp2_transport_params.initial_max_stream_data_bidi_local`, + * or :member:`ngtcp2_transport_params.initial_max_stream_data_uni`, + * depending on the type of stream. The window size is scaled up to + * the value specified in this field. + */ + uint64_t max_stream_window; + /** + * :member:`ack_thresh` is the minimum number of the received ACK + * eliciting packets that triggers the immediate acknowledgement. + */ + size_t ack_thresh; + /** + * :member:`no_tx_udp_payload_size_shaping`, if set to nonzero, + * instructs the library not to limit the UDP payload size to + * :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE` (which can be extended by + * Path MTU Discovery) and instead use the mininum size among the + * given buffer size, :member:`max_tx_udp_payload_size`, and the + * received max_udp_payload QUIC transport parameter. + */ + int no_tx_udp_payload_size_shaping; + /** + * :member:`handshake_timeout` is the period of time before giving + * up QUIC connection establishment. If QUIC handshake is not + * complete within this period, `ngtcp2_conn_handle_expiry` returns + * :macro:`NGTCP2_ERR_HANDSHAKE_TIMEOUT` error. The deadline is + * :member:`initial_ts` + :member:`handshake_timeout`. If this + * field is set to ``UINT64_MAX``, no handshake timeout is set. + */ + ngtcp2_duration handshake_timeout; + /** + * :member:`preferred_versions` is the array of versions that are + * preferred by the local endpoint. All versions set in this array + * must be supported by the library, and compatible to QUIC v1. The + * reserved versions are not allowed. They are sorted in the order + * of preference. + * + * On compatible version negotiation, server will negotiate one of + * those versions contained in this array if there is some overlap + * between these versions and the versions offered by the client. + * If there is no overlap, but the client chosen version is + * supported by the library, the server chooses the client chosen + * version as the negotiated version. This version set corresponds + * to Offered Versions in QUIC Version Negotiation draft, and it + * should be included in Version Negotiation packet. + * + * Client uses this field and :member:`original_version` to prevent + * version downgrade attack if it reacted upon Version Negotiation + * packet. If this field is specified, client must include + * |client_chosen_version| passed to `ngtcp2_conn_client_new` unless + * |client_chosen_version| is a reserved version. + */ + const uint32_t *preferred_versions; + /** + * :member:`preferred_versionslen` is the number of versions that + * are contained in the array pointed by + * :member:`preferred_versions`. + */ + size_t preferred_versionslen; + /** + * :member:`available_versions` is the array of versions that are + * going to be set in :member:`available_versions + * <ngtcp2_version_info.available_versions>` field of outgoing + * version_information QUIC transport parameter. + * + * For server, this corresponds to Fully-Deployed Versions in QUIC + * Version Negotiation draft. If this field is set not, it is set + * to :member:`preferred_versions` internally if + * :member:`preferred_versionslen` is not zero. If this field is + * not set, and :member:`preferred_versionslen` is zero, this field + * is set to :macro:`NGTCP2_PROTO_VER_V1` internally. + * + * Client must include |client_chosen_version| passed to + * `ngtcp2_conn_client_new` in this array if this field is set and + * |client_chosen_version| is not a reserved version. If this field + * is not set, |client_chosen_version| passed to + * `ngtcp2_conn_client_new` will be set in this field internally + * unless |client_chosen_version| is a reserved version. + */ + const uint32_t *available_versions; + /** + * :member:`available_versionslen` is the number of versions that + * are contained in the array pointed by + * :member:`available_versions`. + */ + size_t available_versionslen; + /** + * :member:`original_version` is the original version that client + * initially used to make a connection attempt. If it is set, and + * it differs from |client_chosen_version| passed to + * `ngtcp2_conn_client_new`, the library assumes that client reacted + * upon Version Negotiation packet. Server does not use this field. + */ + uint32_t original_version; + /** + * :member:`no_pmtud`, if set to nonzero, disables Path MTU + * Discovery. + */ + int no_pmtud; +} ngtcp2_settings; + +/** + * @struct + * + * :type:`ngtcp2_addr` is the endpoint address. + */ +typedef struct ngtcp2_addr { + /** + * :member:`addr` points to the buffer which contains endpoint + * address. It must not be ``NULL``. + */ + ngtcp2_sockaddr *addr; + /** + * :member:`addrlen` is the length of addr. + */ + ngtcp2_socklen addrlen; +} ngtcp2_addr; + +/** + * @struct + * + * :type:`ngtcp2_path` is the network endpoints where a packet is sent + * and received. + */ +typedef struct ngtcp2_path { + /** + * :member:`local` is the address of local endpoint. + */ + ngtcp2_addr local; + /** + * :member:`remote` is the address of remote endpoint. + */ + ngtcp2_addr remote; + /** + * :member:`user_data` is an arbitrary data and opaque to the + * library. + * + * Note that :type:`ngtcp2_path` is generally passed to + * :type:`ngtcp2_conn` by an application, and :type:`ngtcp2_conn` + * stores their copies. Unfortunately, there is no way for the + * application to know when :type:`ngtcp2_conn` finishes using a + * specific :type:`ngtcp2_path` object in mid connection, which + * means that the application cannot free the data pointed by this + * field. Therefore, it is advised to use this field only when the + * data pointed by this field persists in an entire lifetime of the + * connection. + */ + void *user_data; +} ngtcp2_path; + +/** + * @struct + * + * :type:`ngtcp2_path_storage` is a convenient struct to have buffers + * to store the longest addresses. + */ +typedef struct ngtcp2_path_storage { + /** + * :member:`path` stores network path. + */ + ngtcp2_path path; + /** + * :member:`local_addrbuf` is a buffer to store local address. + */ + ngtcp2_sockaddr_union local_addrbuf; + /** + * :member:`remote_addrbuf` is a buffer to store remote address. + */ + ngtcp2_sockaddr_union remote_addrbuf; +} ngtcp2_path_storage; + +/** + * @struct + * + * :type:`ngtcp2_crypto_md` is a wrapper around native message digest + * object. + */ +typedef struct ngtcp2_crypto_md { + /** + * :member:`native_handle` is a pointer to an underlying message + * digest object. + */ + void *native_handle; +} ngtcp2_crypto_md; + +/** + * @struct + * + * :type:`ngtcp2_crypto_aead` is a wrapper around native AEAD object. + */ +typedef struct ngtcp2_crypto_aead { + /** + * :member:`native_handle` is a pointer to an underlying AEAD + * object. + */ + void *native_handle; + /** + * :member:`max_overhead` is the number of additional bytes which + * AEAD encryption needs on encryption. + */ + size_t max_overhead; +} ngtcp2_crypto_aead; + +/** + * @struct + * + * :type:`ngtcp2_crypto_cipher` is a wrapper around native cipher + * object. + */ +typedef struct ngtcp2_crypto_cipher { + /** + * :member:`native_handle` is a pointer to an underlying cipher + * object. + */ + void *native_handle; +} ngtcp2_crypto_cipher; + +/** + * @struct + * + * :type:`ngtcp2_crypto_aead_ctx` is a wrapper around native AEAD + * cipher context object. It should be initialized with a specific + * key. ngtcp2 library reuses this context object to encrypt or + * decrypt multiple packets. + */ +typedef struct ngtcp2_crypto_aead_ctx { + /** + * :member:`native_handle` is a pointer to an underlying AEAD + * context object. + */ + void *native_handle; +} ngtcp2_crypto_aead_ctx; + +/** + * @struct + * + * :type:`ngtcp2_crypto_cipher_ctx` is a wrapper around native cipher + * context object. It should be initialized with a specific key. + * ngtcp2 library reuses this context object to encrypt or decrypt + * multiple packet headers. + */ +typedef struct ngtcp2_crypto_cipher_ctx { + /** + * :member:`native_handle` is a pointer to an underlying cipher + * context object. + */ + void *native_handle; +} ngtcp2_crypto_cipher_ctx; + +/** + * @struct + * + * :type:`ngtcp2_crypto_ctx` is a convenient structure to bind all + * crypto related objects in one place. Use + * `ngtcp2_crypto_ctx_initial` to initialize this struct for Initial + * packet encryption. For Handshake and 1RTT packets, use + * `ngtcp2_crypto_ctx_tls`. + */ +typedef struct ngtcp2_crypto_ctx { + /** + * :member:`aead` is AEAD object. + */ + ngtcp2_crypto_aead aead; + /** + * :member:`md` is message digest object. + */ + ngtcp2_crypto_md md; + /** + * :member:`hp` is header protection cipher. + */ + ngtcp2_crypto_cipher hp; + /** + * :member:`max_encryption` is the number of encryption which this + * key can be used with. + */ + uint64_t max_encryption; + /** + * :member:`max_decryption_failure` is the number of decryption + * failure with this key. + */ + uint64_t max_decryption_failure; +} ngtcp2_crypto_ctx; + +/** + * @function + * + * `ngtcp2_encode_transport_params` encodes |params| in |dest| of + * length |destlen|. + * + * If |dest| is NULL, and |destlen| is zero, this function just + * returns the number of bytes required to store the encoded transport + * parameters. + * + * This function returns the number of written, or one of the + * following negative error codes: + * + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small. + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * |exttype| is invalid. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_encode_transport_params_versioned( + uint8_t *dest, size_t destlen, ngtcp2_transport_params_type exttype, + int transport_params_version, const ngtcp2_transport_params *params); + +/** + * @function + * + * `ngtcp2_decode_transport_params` decodes transport parameters in + * |data| of length |datalen|, and stores the result in the object + * pointed by |params|. + * + * If the optional parameters are missing, the default value is + * assigned. + * + * The following fields may point to somewhere inside the buffer + * pointed by |data| of length |datalen|: + * + * - :member:`ngtcp2_transport_params.version_info.available_versions + * <ngtcp2_version_info.available_versions>` + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM` + * The required parameter is missing. + * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM` + * The input is malformed. + */ +NGTCP2_EXTERN int ngtcp2_decode_transport_params_versioned( + int transport_params_version, ngtcp2_transport_params *params, + ngtcp2_transport_params_type exttype, const uint8_t *data, size_t datalen); + +/** + * @function + * + * `ngtcp2_decode_transport_params_new` decodes transport parameters + * in |data| of length |datalen|, and stores the result in the object + * allocated dynamically. The pointer to the allocated object is + * assigned to |*pparams|. Unlike `ngtcp2_decode_transport_params`, + * all direct and indirect fields are also allocated dynamically if + * needed. + * + * |mem| is a memory allocator to allocate memory. If |mem| is + * ``NULL``, the memory allocator returned by `ngtcp2_mem_default()` + * is used. + * + * If the optional parameters are missing, the default value is + * assigned. + * + * `ngtcp2_transport_params_del` frees the memory allocated by this + * function. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM` + * The required parameter is missing. + * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM` + * The input is malformed. + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_decode_transport_params_new( + ngtcp2_transport_params **pparams, ngtcp2_transport_params_type exttype, + const uint8_t *data, size_t datalen, const ngtcp2_mem *mem); + +/** + * @function + * + * `ngtcp2_transport_params_del` frees the |params| which must be + * dynamically allocated by `ngtcp2_decode_transport_params_new`. + * + * |mem| is a memory allocator that allocated |params|. If |mem| is + * ``NULL``, the memory allocator returned by `ngtcp2_mem_default()` + * is used. + * + * If |params| is ``NULL``, this function does nothing. + */ +NGTCP2_EXTERN void ngtcp2_transport_params_del(ngtcp2_transport_params *params, + const ngtcp2_mem *mem); + +/** + * @struct + * + * :type:`ngtcp2_version_cid` is a convenient struct to store the + * result of `ngtcp2_pkt_decode_version_cid`. + */ +typedef struct ngtcp2_version_cid { + /** + * :member:`version` stores QUIC version. + */ + uint32_t version; + /** + * :member:`dcid` points to the Destination Connection ID. + */ + const uint8_t *dcid; + /** + * :member:`dcidlen` is the length of the Destination Connection ID + * pointed by :member:`dcid`. + */ + size_t dcidlen; + /** + * :member:`scid` points to the Source Connection ID. + */ + const uint8_t *scid; + /** + * :member:`scidlen` is the length of the Source Connection ID + * pointed by :member:`scid`. + */ + size_t scidlen; +} ngtcp2_version_cid; + +/** + * @function + * + * `ngtcp2_pkt_decode_version_cid` extracts QUIC version, Destination + * Connection ID and Source Connection ID from the packet pointed by + * |data| of length |datalen|. This function can handle Connection ID + * up to 255 bytes unlike `ngtcp2_pkt_decode_hd_long` or + * `ngtcp2_pkt_decode_hd_short` which are only capable of handling + * Connection ID less than or equal to :macro:`NGTCP2_MAX_CIDLEN`. + * Longer Connection ID is only valid if the version is unsupported + * QUIC version. + * + * If the given packet is Long header packet, this function extracts + * the version from the packet and assigns it to + * :member:`dest->version <ngtcp2_version_cid.version>`. It also + * extracts the pointer to the Destination Connection ID and its + * length and assigns them to :member:`dest->dcid + * <ngtcp2_version_cid.dcid>` and :member:`dest->dcidlen + * <ngtcp2_version_cid.dcidlen>` respectively. Similarly, it extracts + * the pointer to the Source Connection ID and its length and assigns + * them to :member:`dest->scid <ngtcp2_version_cid.scid>` and + * :member:`dest->scidlen <ngtcp2_version_cid.scidlen>` respectively. + * + * If the given packet is Short header packet, :member:`dest->version + * <ngtcp2_version_cid.version>` will be 0, :member:`dest->scid + * <ngtcp2_version_cid.scid>` will be ``NULL``, and + * :member:`dest->scidlen <ngtcp2_version_cid.scidlen>` will be 0. + * Because the Short header packet does not have the length of + * Destination Connection ID, the caller has to pass the length in + * |short_dcidlen|. This function extracts the pointer to the + * Destination Connection ID and assigns it to :member:`dest->dcid + * <ngtcp2_version_cid.dcid>`. |short_dcidlen| is assigned to + * :member:`dest->dcidlen <ngtcp2_version_cid.dcidlen>`. + * + * If Version Negotiation is required, this function returns + * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION`. Unlike the other error + * cases, all fields of |dest| are assigned as described above. + * + * This function returns 0 if it succeeds. Otherwise, one of the + * following negative error code: + * + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * The function could not decode the packet header. + * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION` + * Version Negotiation packet should be sent. + */ +NGTCP2_EXTERN int ngtcp2_pkt_decode_version_cid(ngtcp2_version_cid *dest, + const uint8_t *data, + size_t datalen, + size_t short_dcidlen); + +/** + * @function + * + * `ngtcp2_pkt_decode_hd_long` decodes QUIC long packet header in + * |pkt| of length |pktlen|. This function only parses the input just + * before packet number field. + * + * This function does not verify that length field is correct. In + * other words, this function succeeds even if length > |pktlen|. + * + * This function can handle Connection ID up to + * :macro:`NGTCP2_MAX_CIDLEN`. Consider to use + * `ngtcp2_pkt_decode_version_cid` to get longer Connection ID. + * + * This function handles Version Negotiation specially. If version + * field is 0, |pkt| must contain Version Negotiation packet. Version + * Negotiation packet has random type in wire format. For + * convenience, this function sets + * :enum:`ngtcp2_pkt_type.NGTCP2_PKT_VERSION_NEGOTIATION` to + * :member:`dest->type <ngtcp2_pkt_hd.type>`, clears + * :macro:`NGTCP2_PKT_FLAG_LONG_FORM` flag from :member:`dest->flags + * <ngtcp2_pkt_hd.flags>`, and sets 0 to :member:`dest->len + * <ngtcp2_pkt_hd.len>`. Version Negotiation packet occupies a single + * packet. + * + * It stores the result in the object pointed by |dest|, and returns + * the number of bytes decoded to read the packet header if it + * succeeds, or one of the following error codes: + * + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * Packet is too short; or it is not a long header + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, + const uint8_t *pkt, + size_t pktlen); + +/** + * @function + * + * `ngtcp2_pkt_decode_hd_short` decodes QUIC short header packet + * header in |pkt| of length |pktlen|. |dcidlen| is the length of + * DCID in packet header. Short header packet does not encode the + * length of connection ID, thus we need the input from the outside. + * This function only parses the input just before packet number + * field. This function can handle Connection ID up to + * :macro:`NGTCP2_MAX_CIDLEN`. Consider to use + * `ngtcp2_pkt_decode_version_cid` to get longer Connection ID. It + * stores the result in the object pointed by |dest|, and returns the + * number of bytes decoded to read the packet header if it succeeds, + * or one of the following error codes: + * + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * Packet is too short; or it is not a short header + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_decode_hd_short(ngtcp2_pkt_hd *dest, + const uint8_t *pkt, + size_t pktlen, + size_t dcidlen); + +/** + * @function + * + * `ngtcp2_pkt_write_stateless_reset` writes Stateless Reset packet in + * the buffer pointed by |dest| whose length is |destlen|. + * |stateless_reset_token| is a pointer to the Stateless Reset Token, + * and its length must be :macro:`NGTCP2_STATELESS_RESET_TOKENLEN` + * bytes long. |rand| specifies the random octets preceding Stateless + * Reset Token. The length of |rand| is specified by |randlen| which + * must be at least :macro:`NGTCP2_MIN_STATELESS_RESET_RANDLEN` bytes + * long. + * + * If |randlen| is too long to write them all in the buffer, |rand| is + * written to the buffer as much as possible, and is truncated. + * + * This function returns the number of bytes written to the buffer, or + * one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small. + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * |randlen| is strictly less than + * :macro:`NGTCP2_MIN_STATELESS_RESET_RANDLEN`. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_stateless_reset( + uint8_t *dest, size_t destlen, const uint8_t *stateless_reset_token, + const uint8_t *rand, size_t randlen); + +/** + * @function + * + * `ngtcp2_pkt_write_version_negotiation` writes Version Negotiation + * packet in the buffer pointed by |dest| whose length is |destlen|. + * |unused_random| should be generated randomly. |dcid| is the + * destination connection ID which appears in a packet as a source + * connection ID sent by client which caused version negotiation. + * Similarly, |scid| is the source connection ID which appears in a + * packet as a destination connection ID sent by client. |sv| is a + * list of supported versions, and |nsv| specifies the number of + * supported versions included in |sv|. + * + * This function returns the number of bytes written to the buffer, or + * one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_version_negotiation( + uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid, + size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv, + size_t nsv); + +/** + * @struct + * + * :type:`ngtcp2_conn` represents a single QUIC connection. + */ +typedef struct ngtcp2_conn ngtcp2_conn; + +/** + * @functypedef + * + * :type:`ngtcp2_client_initial` is invoked when client application + * asks TLS stack to produce first TLS cryptographic handshake data. + * + * This implementation of this callback must get the first handshake + * data from TLS stack and pass it to ngtcp2 library using + * `ngtcp2_conn_submit_crypto_data` function. Make sure that before + * calling `ngtcp2_conn_submit_crypto_data` function, client + * application must create initial packet protection keys and IVs, and + * provide them to ngtcp2 library using + * `ngtcp2_conn_install_initial_key`. + * + * This callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call + * return immediately. + */ +typedef int (*ngtcp2_client_initial)(ngtcp2_conn *conn, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_recv_client_initial` is invoked when server receives + * Initial packet from client. An server application must implement + * this callback, and generate initial keys and IVs for both + * transmission and reception. Install them using + * `ngtcp2_conn_install_initial_key`. |dcid| is the destination + * connection ID which client generated randomly. It is used to + * derive initial packet protection keys. + * + * The callback function must return 0 if it succeeds. If an error + * occurs, return :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the + * library call return immediately. + */ +typedef int (*ngtcp2_recv_client_initial)(ngtcp2_conn *conn, + const ngtcp2_cid *dcid, + void *user_data); + +/** + * @enum + * + * :type:`ngtcp2_crypto_level` is encryption level. + */ +typedef enum ngtcp2_crypto_level { + /** + * :enum:`NGTCP2_CRYPTO_LEVEL_INITIAL` is Initial Keys encryption + * level. + */ + NGTCP2_CRYPTO_LEVEL_INITIAL, + /** + * :enum:`NGTCP2_CRYPTO_LEVEL_HANDSHAKE` is Handshake Keys + * encryption level. + */ + NGTCP2_CRYPTO_LEVEL_HANDSHAKE, + /** + * :enum:`NGTCP2_CRYPTO_LEVEL_APPLICATION` is Application Data + * (1-RTT) Keys encryption level. + */ + NGTCP2_CRYPTO_LEVEL_APPLICATION, + /** + * :enum:`NGTCP2_CRYPTO_LEVEL_EARLY` is Early Data (0-RTT) Keys + * encryption level. + */ + NGTCP2_CRYPTO_LEVEL_EARLY +} ngtcp2_crypto_level; + +/** + * @functypedef + * + * :type`ngtcp2_recv_crypto_data` is invoked when crypto data is + * received. The received data is pointed to by |data|, and its + * length is |datalen|. The |offset| specifies the offset where + * |data| is positioned. |user_data| is the arbitrary pointer passed + * to `ngtcp2_conn_client_new` or `ngtcp2_conn_server_new`. The + * ngtcp2 library ensures that the crypto data is passed to the + * application in the increasing order of |offset|. |datalen| is + * always strictly greater than 0. |crypto_level| indicates the + * encryption level where this data is received. Crypto data can + * never be received in + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`. + * + * The application should provide the given data to TLS stack. + * + * The callback function must return 0 if it succeeds, or one of the + * following negative error codes: + * + * - :macro:`NGTCP2_ERR_CRYPTO` + * - :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM` + * - :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM` + * - :macro:`NGTCP2_ERR_TRANSPORT_PARAM` + * - :macro:`NGTCP2_ERR_PROTO` + * - :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE` + * - :macro:`NGTCP2_ERR_NOMEM` + * - :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * + * If the other value is returned, it is treated as + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. + * + * If application encounters fatal error, return + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call + * return immediately. + */ +typedef int (*ngtcp2_recv_crypto_data)(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + uint64_t offset, const uint8_t *data, + size_t datalen, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_handshake_completed` is invoked when QUIC + * cryptographic handshake has completed. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_handshake_completed)(ngtcp2_conn *conn, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_handshake_confirmed` is invoked when QUIC + * cryptographic handshake is confirmed. The handshake confirmation + * means that both endpoints agree that handshake has finished. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_handshake_confirmed)(ngtcp2_conn *conn, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_recv_version_negotiation` is invoked when Version + * Negotiation packet is received. |hd| is the pointer to the QUIC + * packet header object. The vector |sv| of |nsv| elements contains + * the QUIC version the server supports. Since Version Negotiation is + * only sent by server, this callback function is used by client only. + * + * The callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call + * return immediately. + */ +typedef int (*ngtcp2_recv_version_negotiation)(ngtcp2_conn *conn, + const ngtcp2_pkt_hd *hd, + const uint32_t *sv, size_t nsv, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_recv_retry` is invoked when Retry packet is received. + * This callback is client use only. + * + * Application must regenerate packet protection key, IV, and header + * protection key for Initial packets using the destination connection + * ID obtained by :member:`hd->scid <ngtcp2_pkt_hd.scid>` and install + * them by calling `ngtcp2_conn_install_initial_key()`. + * + * 0-RTT data accepted by the ngtcp2 library will be automatically + * retransmitted as 0-RTT data by the library. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_recv_retry)(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_encrypt` is invoked when the ngtcp2 library asks the + * application to encrypt packet payload. The packet payload to + * encrypt is passed as |plaintext| of length |plaintextlen|. The + * AEAD cipher is |aead|. |aead_ctx| is the AEAD cipher context + * object which is initialized with encryption key. The nonce is + * passed as |nonce| of length |noncelen|. The Additional + * Authenticated Data is passed as |aad| of length |aadlen|. + * + * The implementation of this callback must encrypt |plaintext| using + * the negotiated cipher suite and write the ciphertext into the + * buffer pointed by |dest|. |dest| has enough capacity to store the + * ciphertext and any additional AEAD tag data. + * + * |dest| and |plaintext| may point to the same buffer. + * + * The callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call + * return immediately. + */ +typedef int (*ngtcp2_encrypt)(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *plaintext, size_t plaintextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen); + +/** + * @functypedef + * + * :type:`ngtcp2_decrypt` is invoked when the ngtcp2 library asks the + * application to decrypt packet payload. The packet payload to + * decrypt is passed as |ciphertext| of length |ciphertextlen|. The + * AEAD cipher is |aead|. |aead_ctx| is the AEAD cipher context + * object which is initialized with decryption key. The nonce is + * passed as |nonce| of length |noncelen|. The Additional + * Authenticated Data is passed as |aad| of length |aadlen|. + * + * The implementation of this callback must decrypt |ciphertext| using + * the negotiated cipher suite and write the ciphertext into the + * buffer pointed by |dest|. |dest| has enough capacity to store the + * cleartext. + * + * |dest| and |ciphertext| may point to the same buffer. + * + * The callback function must return 0 if it succeeds. If TLS stack + * fails to decrypt data, return :macro:`NGTCP2_ERR_DECRYPT`. For any + * other errors, return :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which + * makes the library call return immediately. + */ +typedef int (*ngtcp2_decrypt)(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *ciphertext, size_t ciphertextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen); + +/** + * @functypedef + * + * :type:`ngtcp2_hp_mask` is invoked when the ngtcp2 library asks the + * application to produce a mask to encrypt or decrypt packet header. + * The encryption cipher is |hp|. |hp_ctx| is the cipher context + * object which is initialized with header protection key. The sample + * is passed as |sample| which is :macro:`NGTCP2_HP_SAMPLELEN` bytes + * long. + * + * The implementation of this callback must produce a mask using the + * header protection cipher suite specified by QUIC specification and + * write the result into the buffer pointed by |dest|. The length of + * the mask must be at least :macro:`NGTCP2_HP_MASKLEN`. The library + * only uses the first :macro:`NGTCP2_HP_MASKLEN` bytes of the + * produced mask. The buffer pointed by |dest| is guaranteed to have + * at least :macro:`NGTCP2_HP_SAMPLELEN` bytes available for + * convenience. + * + * The callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call + * return immediately. + */ +typedef int (*ngtcp2_hp_mask)(uint8_t *dest, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx, + const uint8_t *sample); + +/** + * @macrosection + * + * Stream data flags + */ + +/** + * @macro + * + * :macro:`NGTCP2_STREAM_DATA_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_STREAM_DATA_FLAG_NONE 0x00u + +/** + * @macro + * + * :macro:`NGTCP2_STREAM_DATA_FLAG_FIN` indicates that this chunk of + * data is final piece of an incoming stream. + */ +#define NGTCP2_STREAM_DATA_FLAG_FIN 0x01u + +/** + * @macro + * + * :macro:`NGTCP2_STREAM_DATA_FLAG_EARLY` indicates that this chunk of + * data contains data received in 0RTT packet and the handshake has + * not completed yet, which means that the data might be replayed. + */ +#define NGTCP2_STREAM_DATA_FLAG_EARLY 0x02u + +/** + * @functypedef + * + * :type:`ngtcp2_recv_stream_data` is invoked when stream data is + * received. The stream is specified by |stream_id|. |flags| is the + * bitwise-OR of zero or more of :macro:`NGTCP2_STREAM_DATA_FLAG_* + * <NGTCP2_STREAM_DATA_FLAG_NONE>`. If |flags| & + * :macro:`NGTCP2_STREAM_DATA_FLAG_FIN` is nonzero, this portion of + * the data is the last data in this stream. |offset| is the offset + * where this data begins. The library ensures that data is passed to + * the application in the non-decreasing order of |offset| without any + * overlap. The data is passed as |data| of length |datalen|. + * |datalen| may be 0 if and only if |fin| is nonzero. + * + * If :macro:`NGTCP2_STREAM_DATA_FLAG_EARLY` is set in |flags|, it + * indicates that a part of or whole data was received in 0RTT packet + * and a handshake has not completed yet. + * + * The callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return + * immediately. + */ +typedef int (*ngtcp2_recv_stream_data)(ngtcp2_conn *conn, uint32_t flags, + int64_t stream_id, uint64_t offset, + const uint8_t *data, size_t datalen, + void *user_data, void *stream_user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_stream_open` is a callback function which is called + * when remote stream is opened by peer. This function is not called + * if stream is opened by implicitly (we might reconsider this + * behaviour). + * + * The implementation of this callback should return 0 if it succeeds. + * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library + * call return immediately. + */ +typedef int (*ngtcp2_stream_open)(ngtcp2_conn *conn, int64_t stream_id, + void *user_data); + +/** + * @macrosection + * + * Stream close flags + */ + +/** + * @macro + * + * :macro:`NGTCP2_STREAM_CLOSE_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_STREAM_CLOSE_FLAG_NONE 0x00u + +/** + * @macro + * + * :macro:`NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET` indicates that + * app_error_code parameter is set. + */ +#define NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET 0x01u + +/** + * @functypedef + * + * :type:`ngtcp2_stream_close` is invoked when a stream is closed. + * This callback is not called when QUIC connection is closed before + * existing streams are closed. |flags| is the bitwise-OR of zero or + * more of :macro:`NGTCP2_STREAM_CLOSE_FLAG_* + * <NGTCP2_STREAM_CLOSE_FLAG_NONE>`. |app_error_code| indicates the + * error code of this closure if + * :macro:`NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET` is set in + * |flags|. If it is not set, the stream was closed without any error + * code, which generally means success. + * + * |app_error_code| is the first application error code sent by a + * local endpoint, or received from a remote endpoint. If a stream is + * closed cleanly, no application error code is exchanged. Since QUIC + * stack does not know the application error code which indicates "no + * errors", |app_error_code| is set to 0 and + * :macro:`NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET` is not set in + * |flags| in this case. + * + * The implementation of this callback should return 0 if it succeeds. + * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library + * call return immediately. + */ +typedef int (*ngtcp2_stream_close)(ngtcp2_conn *conn, uint32_t flags, + int64_t stream_id, uint64_t app_error_code, + void *user_data, void *stream_user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_stream_reset` is invoked when a stream identified by + * |stream_id| is reset by a remote endpoint. + * + * The implementation of this callback should return 0 if it succeeds. + * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library + * call return immediately. + */ +typedef int (*ngtcp2_stream_reset)(ngtcp2_conn *conn, int64_t stream_id, + uint64_t final_size, uint64_t app_error_code, + void *user_data, void *stream_user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_acked_stream_data_offset` is a callback function + * which is called when stream data is acked, and application can free + * the data. The acked range of data is [offset, offset + datalen). + * For a given stream_id, this callback is called sequentially in + * increasing order of |offset| without any overlap. |datalen| is + * normally strictly greater than 0. One exception is that when a + * packet which includes STREAM frame which has fin flag set, and 0 + * length data, this callback is invoked with 0 passed as |datalen|. + * + * If a stream is closed prematurely and stream data is still + * in-flight, this callback function is not called for those data. + * + * The implementation of this callback should return 0 if it succeeds. + * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library + * call return immediately. + */ +typedef int (*ngtcp2_acked_stream_data_offset)( + ngtcp2_conn *conn, int64_t stream_id, uint64_t offset, uint64_t datalen, + void *user_data, void *stream_user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_recv_stateless_reset` is a callback function which is + * called when Stateless Reset packet is received. The stateless + * reset details are given in |sr|. + * + * The implementation of this callback should return 0 if it succeeds. + * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library + * call return immediately. + */ +typedef int (*ngtcp2_recv_stateless_reset)(ngtcp2_conn *conn, + const ngtcp2_pkt_stateless_reset *sr, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_extend_max_streams` is a callback function which is + * called every time max stream ID is strictly extended. + * |max_streams| is the cumulative number of streams which an endpoint + * can open. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_extend_max_streams)(ngtcp2_conn *conn, + uint64_t max_streams, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_extend_max_stream_data` is a callback function which + * is invoked when max stream data is extended. |stream_id| + * identifies the stream. |max_data| is a cumulative number of bytes + * the endpoint can send on this stream. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_extend_max_stream_data)(ngtcp2_conn *conn, + int64_t stream_id, + uint64_t max_data, void *user_data, + void *stream_user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_rand` is a callback function to get randomized byte + * string from application. Application must fill random |destlen| + * bytes to the buffer pointed by |dest|. The generated bytes are + * used only in non-cryptographic context. + */ +typedef void (*ngtcp2_rand)(uint8_t *dest, size_t destlen, + const ngtcp2_rand_ctx *rand_ctx); + +/** + * @functypedef + * + * :type:`ngtcp2_get_new_connection_id` is a callback function to ask + * an application for new connection ID. Application must generate + * new unused connection ID with the exact |cidlen| bytes and store it + * in |cid|. It also has to generate stateless reset token into + * |token|. The length of stateless reset token is + * :macro:`NGTCP2_STATELESS_RESET_TOKENLEN` and it is guaranteed that + * the buffer pointed by |cid| has the sufficient space to store the + * token. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_get_new_connection_id)(ngtcp2_conn *conn, ngtcp2_cid *cid, + uint8_t *token, size_t cidlen, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_remove_connection_id` is a callback function which + * notifies the application that connection ID |cid| is no longer used + * by remote endpoint. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_remove_connection_id)(ngtcp2_conn *conn, + const ngtcp2_cid *cid, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_update_key` is a callback function which tells the + * application that it must generate new packet protection keying + * materials and AEAD cipher context objects with new keys. The + * current set of secrets are given as |current_rx_secret| and + * |current_tx_secret| of length |secretlen|. They are decryption and + * encryption secrets respectively. + * + * The application has to generate new secrets and keys for both + * encryption and decryption, and write decryption secret and IV to + * the buffer pointed by |rx_secret| and |rx_iv| respectively. It + * also has to create new AEAD cipher context object with new + * decryption key and initialize |rx_aead_ctx| with it. Similarly, + * write encryption secret and IV to the buffer pointed by |tx_secret| + * and |tx_iv|. Create new AEAD cipher context object with new + * encryption key and initialize |tx_aead_ctx| with it. All given + * buffers have the enough capacity to store secret, key and IV. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_update_key)( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen, void *user_data); + +/** + * @macrosection + * + * Path validation related macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_PATH_VALIDATION_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_PATH_VALIDATION_FLAG_NONE 0x00u + +/** + * @macro + * + * :macro:`NGTCP2_PATH_VALIDATION_FLAG_PREFERRED_ADDR` indicates the + * validation involving server preferred address. This flag is only + * set for client. + */ +#define NGTCP2_PATH_VALIDATION_FLAG_PREFERRED_ADDR 0x01u + +/** + * @functypedef + * + * :type:`ngtcp2_path_validation` is a callback function which tells + * the application the outcome of path validation. |flags| is zero or + * more of :macro:`NGTCP2_PATH_VALIDATION_FLAG_* + * <NGTCP2_PATH_VALIDATION_FLAG_NONE>`. |path| is the path that was + * validated. If |res| is + * :enum:`ngtcp2_path_validation_result.NGTCP2_PATH_VALIDATION_RESULT_SUCCESS`, + * the path validation succeeded. If |res| is + * :enum:`ngtcp2_path_validation_result.NGTCP2_PATH_VALIDATION_RESULT_FAILURE`, + * the path validation failed. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_path_validation)(ngtcp2_conn *conn, uint32_t flags, + const ngtcp2_path *path, + ngtcp2_path_validation_result res, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_select_preferred_addr` is a callback function which + * asks a client application to choose server address from preferred + * addresses |paddr| received from server. An application should + * write a network path for a selected preferred address in |dest|. + * More specifically, the selected preferred address must be set to + * :member:`dest->remote <ngtcp2_path.remote>`, a client source + * address must be set to :member:`dest->local <ngtcp2_path.local>`. + * If a client source address does not change for the new server + * address, leave :member:`dest->local <ngtcp2_path.local>` + * unmodified, or copy the value of :member:`local + * <ngtcp2_path.local>` field of the current network path obtained + * from `ngtcp2_conn_get_path()`. Both :member:`dest->local.addr + * <ngtcp2_addr.addr>` and :member:`dest->remote.addr + * <ngtcp2_addr.addr>` point to buffers which are at least + * ``sizeof(struct sockaddr_storage)`` bytes long, respectively. If + * an application denies the preferred addresses, just leave |dest| + * unmodified (or set :member:`dest->remote.addrlen + * <ngtcp2_addr.addrlen>` to 0) and return 0. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_select_preferred_addr)(ngtcp2_conn *conn, + ngtcp2_path *dest, + const ngtcp2_preferred_addr *paddr, + void *user_data); + +/** + * @enum + * + * :type:`ngtcp2_connection_id_status_type` defines a set of status + * for Destination Connection ID. + */ +typedef enum ngtcp2_connection_id_status_type { + /** + * :enum:`NGTCP2_CONNECTION_ID_STATUS_TYPE_ACTIVATE` indicates that + * a local endpoint starts using new destination Connection ID. + */ + NGTCP2_CONNECTION_ID_STATUS_TYPE_ACTIVATE, + /** + * :enum:`NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE` indicates + * that a local endpoint stops using a given destination Connection + * ID. + */ + NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE +} ngtcp2_connection_id_status_type; + +/** + * @functypedef + * + * :type:`ngtcp2_connection_id_status` is a callback function which is + * called when the status of Connection ID changes. + * + * |token| is the associated stateless reset token and it is ``NULL`` + * if no token is present. + * + * |type| is the one of the value defined in + * :type:`ngtcp2_connection_id_status_type`. The new value might be + * added in the future release. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_connection_id_status)(ngtcp2_conn *conn, int type, + uint64_t seq, const ngtcp2_cid *cid, + const uint8_t *token, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_recv_new_token` is a callback function which is + * called when new token is received from server. + * + * |token| is the received token of length |tokenlen| bytes long. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_recv_new_token)(ngtcp2_conn *conn, const uint8_t *token, + size_t tokenlen, void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_delete_crypto_aead_ctx` is a callback function which + * must delete the native object pointed by + * :member:`aead_ctx->native_handle + * <ngtcp2_crypto_aead_ctx.native_handle>`. + */ +typedef void (*ngtcp2_delete_crypto_aead_ctx)(ngtcp2_conn *conn, + ngtcp2_crypto_aead_ctx *aead_ctx, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_delete_crypto_cipher_ctx` is a callback function + * which must delete the native object pointed by + * :member:`cipher_ctx->native_handle + * <ngtcp2_crypto_cipher_ctx.native_handle>`. + */ +typedef void (*ngtcp2_delete_crypto_cipher_ctx)( + ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data); + +/** + * @macrosection + * + * Datagram flags + */ + +/** + * @macro + * + * :macro:`NGTCP2_DATAGRAM_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_DATAGRAM_FLAG_NONE 0x00u + +/** + * @macro + * + * :macro:`NGTCP2_DATAGRAM_FLAG_EARLY` indicates that DATAGRAM frame + * is received in 0RTT packet and the handshake has not completed yet, + * which means that the data might be replayed. + */ +#define NGTCP2_DATAGRAM_FLAG_EARLY 0x01u + +/** + * @functypedef + * + * :type:`ngtcp2_recv_datagram` is invoked when DATAGRAM frame is + * received. |flags| is bitwise-OR of zero or more of + * :macro:`NGTCP2_DATAGRAM_FLAG_* <NGTCP2_DATAGRAM_FLAG_NONE>`. + * + * If :macro:`NGTCP2_DATAGRAM_FLAG_EARLY` is set in |flags|, it + * indicates that DATAGRAM frame was received in 0RTT packet and a + * handshake has not completed yet. + * + * The callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return + * immediately. + */ +typedef int (*ngtcp2_recv_datagram)(ngtcp2_conn *conn, uint32_t flags, + const uint8_t *data, size_t datalen, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_ack_datagram` is invoked when a packet which contains + * DATAGRAM frame which is identified by |dgram_id| is acknowledged. + * |dgram_id| is the valued passed to `ngtcp2_conn_writev_datagram`. + * + * The callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return + * immediately. + */ +typedef int (*ngtcp2_ack_datagram)(ngtcp2_conn *conn, uint64_t dgram_id, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_lost_datagram` is invoked when a packet which + * contains DATAGRAM frame which is identified by |dgram_id| is + * declared lost. |dgram_id| is the valued passed to + * `ngtcp2_conn_writev_datagram`. Note that the loss might be + * spurious, and DATAGRAM frame might be acknowledged later. + * + * The callback function must return 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return + * immediately. + */ +typedef int (*ngtcp2_lost_datagram)(ngtcp2_conn *conn, uint64_t dgram_id, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_get_path_challenge_data` is a callback function to + * ask an application for new data that is sent in PATH_CHALLENGE + * frame. Application must generate new unpredictable exactly + * :macro:`NGTCP2_PATH_CHALLENGE_DATALEN` bytes of random data and + * store them into the buffer pointed by |data|. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_get_path_challenge_data)(ngtcp2_conn *conn, uint8_t *data, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_stream_stop_sending` is invoked when a stream is no + * longer read by a local endpoint before it receives all stream data. + * This function is called at most once per stream. |app_error_code| + * is the error code passed to `ngtcp2_conn_shutdown_stream_read` or + * `ngtcp2_conn_shutdown_stream`. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_stream_stop_sending)(ngtcp2_conn *conn, int64_t stream_id, + uint64_t app_error_code, + void *user_data, + void *stream_user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_version_negotiation` is invoked when the compatible + * version negotiation takes place. For client, it is called when it + * sees a change in version field of a long header packet. This + * callback function might be called multiple times for client. For + * server, it is called once when the version is negotiated. + * + * The implementation of this callback must install new Initial keys + * for |version|. Use `ngtcp2_conn_install_vneg_initial_key` to + * install keys. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_version_negotiation)(ngtcp2_conn *conn, uint32_t version, + const ngtcp2_cid *client_dcid, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_recv_key` is invoked when new key is installed to + * |conn| during QUIC cryptographic handshake. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_recv_key)(ngtcp2_conn *conn, ngtcp2_crypto_level level, + void *user_data); + +/** + * @functypedef + * + * :type:`ngtcp2_early_data_rejected` is invoked when early data was + * rejected by server, or client decided not to attempt early data. + * + * The callback function must return 0 if it succeeds. Returning + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return + * immediately. + */ +typedef int (*ngtcp2_early_data_rejected)(ngtcp2_conn *conn, void *user_data); + +#define NGTCP2_CALLBACKS_VERSION_V1 1 +#define NGTCP2_CALLBACKS_VERSION NGTCP2_CALLBACKS_VERSION_V1 + +/** + * @struct + * + * :type:`ngtcp2_callbacks` holds a set of callback functions. + */ +typedef struct ngtcp2_callbacks { + /** + * :member:`client_initial` is a callback function which is invoked + * when client asks TLS stack to produce first TLS cryptographic + * handshake message. This callback function must be specified for + * a client application. + */ + ngtcp2_client_initial client_initial; + /** + * :member:`recv_client_initial` is a callback function which is + * invoked when a server receives the first packet from client. + * This callback function must be specified for a server application. + */ + ngtcp2_recv_client_initial recv_client_initial; + /** + * :member:`recv_crypto_data` is a callback function which is + * invoked when cryptographic data (CRYPTO frame, in other words, + * TLS message) is received. This callback function must be + * specified. + */ + ngtcp2_recv_crypto_data recv_crypto_data; + /** + * :member:`handshake_completed` is a callback function which is + * invoked when QUIC cryptographic handshake has completed. This + * callback function is optional. + */ + ngtcp2_handshake_completed handshake_completed; + /** + * :member:`recv_version_negotiation` is a callback function which + * is invoked when Version Negotiation packet is received by a + * client. This callback function is optional. + */ + ngtcp2_recv_version_negotiation recv_version_negotiation; + /** + * :member:`encrypt` is a callback function which is invoked to + * encrypt a QUIC packet. This callback function must be specified. + */ + ngtcp2_encrypt encrypt; + /** + * :member:`decrypt` is a callback function which is invoked to + * decrypt a QUIC packet. This callback function must be specified. + */ + ngtcp2_decrypt decrypt; + /** + * :member:`hp_mask` is a callback function which is invoked to get + * a mask to encrypt or decrypt packet header. This callback + * function must be specified. + */ + ngtcp2_hp_mask hp_mask; + /** + * :member:`recv_stream_data` is a callback function which is + * invoked when STREAM data, which includes application data, is + * received. This callback function is optional. + */ + ngtcp2_recv_stream_data recv_stream_data; + /** + * :member:`acked_stream_data_offset` is a callback function which + * is invoked when STREAM data, which includes application data, is + * acknowledged by a remote endpoint. It tells an application the + * largest offset of acknowledged STREAM data without a gap so that + * application can free memory for the data. This callback function + * is optional. + */ + ngtcp2_acked_stream_data_offset acked_stream_data_offset; + /** + * :member:`stream_open` is a callback function which is invoked + * when new remote stream is opened by a remote endpoint. This + * callback function is optional. + */ + ngtcp2_stream_open stream_open; + /** + * :member:`stream_close` is a callback function which is invoked + * when a stream is closed. This callback function is optional. + */ + ngtcp2_stream_close stream_close; + /** + * :member:`recv_stateless_reset` is a callback function which is + * invoked when Stateless Reset packet is received. This callback + * function is optional. + */ + ngtcp2_recv_stateless_reset recv_stateless_reset; + /** + * :member:`recv_retry` is a callback function which is invoked when + * a client receives Retry packet. For client, this callback + * function must be specified. Server never receive Retry packet. + */ + ngtcp2_recv_retry recv_retry; + /** + * :member:`extend_max_local_streams_bidi` is a callback function + * which is invoked when the number of bidirectional stream which a + * local endpoint can open is increased. This callback function is + * optional. + */ + ngtcp2_extend_max_streams extend_max_local_streams_bidi; + /** + * :member:`extend_max_local_streams_uni` is a callback function + * which is invoked when the number of unidirectional stream which a + * local endpoint can open is increased. This callback function is + * optional. + */ + ngtcp2_extend_max_streams extend_max_local_streams_uni; + /** + * :member:`rand` is a callback function which is invoked when the + * library needs sequence of random data. This callback function + * must be specified. + */ + ngtcp2_rand rand; + /** + * :member:`get_new_connection_id` is a callback function which is + * invoked when the library needs new connection ID. This callback + * function must be specified. + */ + ngtcp2_get_new_connection_id get_new_connection_id; + /** + * :member:`remove_connection_id` is a callback function which + * notifies an application that connection ID is no longer used by a + * remote endpoint. This callback function is optional. + */ + ngtcp2_remove_connection_id remove_connection_id; + /** + * :member:`update_key` is a callback function which is invoked when + * the library tells an application that it must update keying + * materials and install new keys. This callback function must be + * specified. + */ + ngtcp2_update_key update_key; + /** + * :member:`path_validation` is a callback function which is invoked + * when path validation completed. This callback function is + * optional. + */ + ngtcp2_path_validation path_validation; + /** + * :member:`select_preferred_addr` is a callback function which is + * invoked when the library asks a client to select preferred + * address presented by a server. This callback function is + * optional. + */ + ngtcp2_select_preferred_addr select_preferred_addr; + /** + * :member:`stream_reset` is a callback function which is invoked + * when a stream is reset by a remote endpoint. This callback + * function is optional. + */ + ngtcp2_stream_reset stream_reset; + /** + * :member:`extend_max_remote_streams_bidi` is a callback function + * which is invoked when the number of bidirectional streams which a + * remote endpoint can open is increased. This callback function is + * optional. + */ + ngtcp2_extend_max_streams extend_max_remote_streams_bidi; + /** + * :member:`extend_max_remote_streams_uni` is a callback function + * which is invoked when the number of unidirectional streams which + * a remote endpoint can open is increased. This callback function + * is optional. + */ + ngtcp2_extend_max_streams extend_max_remote_streams_uni; + /** + * :member:`extend_max_stream_data` is callback function which is + * invoked when the maximum offset of STREAM data that a local + * endpoint can send is increased. This callback function is + * optional. + */ + ngtcp2_extend_max_stream_data extend_max_stream_data; + /** + * :member:`dcid_status` is a callback function which is invoked + * when the new destination Connection ID is activated or the + * activated destination Connection ID is now deactivated. This + * callback function is optional. + */ + ngtcp2_connection_id_status dcid_status; + /** + * :member:`handshake_confirmed` is a callback function which is + * invoked when both endpoints agree that handshake has finished. + * This field is ignored by server because handshake_completed + * indicates the handshake confirmation for server. This callback + * function is optional. + */ + ngtcp2_handshake_confirmed handshake_confirmed; + /** + * :member:`recv_new_token` is a callback function which is invoked + * when new token is received from server. This field is ignored by + * server. This callback function is optional. + */ + ngtcp2_recv_new_token recv_new_token; + /** + * :member:`delete_crypto_aead_ctx` is a callback function which + * deletes a given AEAD cipher context object. This callback + * function must be specified. + */ + ngtcp2_delete_crypto_aead_ctx delete_crypto_aead_ctx; + /** + * :member:`delete_crypto_cipher_ctx` is a callback function which + * deletes a given cipher context object. This callback function + * must be specified. + */ + ngtcp2_delete_crypto_cipher_ctx delete_crypto_cipher_ctx; + /** + * :member:`recv_datagram` is a callback function which is invoked + * when DATAGRAM frame is received. This callback function is + * optional. + */ + ngtcp2_recv_datagram recv_datagram; + /** + * :member:`ack_datagram` is a callback function which is invoked + * when a packet containing DATAGRAM frame is acknowledged. This + * callback function is optional. + */ + ngtcp2_ack_datagram ack_datagram; + /** + * :member:`lost_datagram` is a callback function which is invoked + * when a packet containing DATAGRAM frame is declared lost. This + * callback function is optional. + */ + ngtcp2_lost_datagram lost_datagram; + /** + * :member:`get_path_challenge_data` is a callback function which is + * invoked when the library needs new PATH_CHALLENGE data. This + * callback must be specified. + */ + ngtcp2_get_path_challenge_data get_path_challenge_data; + /** + * :member:`stream_stop_sending` is a callback function which is + * invoked when a local endpoint no longer reads from a stream + * before it receives all stream data. This callback function is + * optional. + */ + ngtcp2_stream_stop_sending stream_stop_sending; + /** + * :member:`version_negotiation` is a callback function which is + * invoked when the compatible version negotiation takes place. + * This callback function must be specified. + */ + ngtcp2_version_negotiation version_negotiation; + /** + * :member:`recv_rx_key` is a callback function which is invoked + * when a new key for decrypting packets is installed during QUIC + * cryptographic handshake. It is not called for + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_INITIAL`. + */ + ngtcp2_recv_key recv_rx_key; + /** + * :member:`recv_tx_key` is a callback function which is invoked + * when a new key for encrypting packets is installed during QUIC + * cryptographic handshake. It is not called for + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_INITIAL`. + */ + ngtcp2_recv_key recv_tx_key; + /** + * :member:`ngtcp2_early_data_rejected` is a callback function which + * is invoked when an attempt to send early data by client was + * rejected by server, or client decided not to attempt early data. + * This callback function is only used by client. + */ + ngtcp2_early_data_rejected early_data_rejected; +} ngtcp2_callbacks; + +/** + * @function + * + * `ngtcp2_pkt_write_connection_close` writes Initial packet + * containing CONNECTION_CLOSE frame with the given |error_code| and + * the optional |reason| of length |reasonlen| to the buffer pointed + * by |dest| of length |destlen|. All encryption parameters are for + * Initial packet encryption. The packet number is always 0. + * + * The primary use case of this function is for server to send + * CONNECTION_CLOSE frame in Initial packet to close connection + * without committing the state when validating Retry token fails. + * + * This function returns the number of bytes written if it succeeds, + * or one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * Callback function failed. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_connection_close( + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, + ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx); + +/** + * @function + * + * `ngtcp2_pkt_write_retry` writes Retry packet in the buffer pointed + * by |dest| whose length is |destlen|. |dcid| is the destination + * connection ID which appeared in a packet as a source connection ID + * sent by client. |scid| is a server chosen source connection ID. + * |odcid| specifies Original Destination Connection ID which appeared + * in a packet as a destination connection ID sent by client. |token| + * specifies Retry Token, and |tokenlen| specifies its length. |aead| + * must be AEAD_AES_128_GCM. |aead_ctx| must be initialized with + * :macro:`NGTCP2_RETRY_KEY` as an encryption key. + * + * This function returns the number of bytes written to the buffer, or + * one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * Callback function failed. + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * :member:`odcid->datalen <ngtcp2_cid.datalen>` is less than + * :macro:`NGTCP2_MIN_INITIAL_DCIDLEN`. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_retry( + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, + size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx); + +/** + * @function + * + * `ngtcp2_accept` is used by server implementation, and decides + * whether packet |pkt| of length |pktlen| from client is acceptable + * for the very initial packet to a connection. + * + * If |dest| is not ``NULL`` and the function returns 0, or + * :macro:`NGTCP2_ERR_RETRY`, the decoded packet header is stored to + * the object pointed by |dest|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_RETRY` + * Retry packet should be sent. + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * The packet is not acceptable for the very first packet to a new + * connection; or the function failed to parse the packet header. + */ +NGTCP2_EXTERN int ngtcp2_accept(ngtcp2_pkt_hd *dest, const uint8_t *pkt, + size_t pktlen); + +/** + * @function + * + * `ngtcp2_conn_client_new` creates new :type:`ngtcp2_conn`, and + * initializes it as client. |dcid| is randomized destination + * connection ID. |scid| is source connection ID. + * |client_chosen_version| is a QUIC version that a client chooses. + * |path| is the network path where this QUIC connection is being + * established and must not be ``NULL``. |callbacks|, |settings|, and + * |params| must not be ``NULL``, and the function make a copy of each + * of them. |params| is local QUIC transport parameters and sent to a + * remote endpoint during handshake. |user_data| is the arbitrary + * pointer which is passed to the user-defined callback functions. If + * |mem| is ``NULL``, the memory allocator returned by + * `ngtcp2_mem_default()` is used. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_client_new_versioned( + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data); + +/** + * @function + * + * `ngtcp2_conn_server_new` creates new :type:`ngtcp2_conn`, and + * initializes it as server. |dcid| is a destination connection ID. + * |scid| is a source connection ID. |path| is the network path where + * this QUIC connection is being established and must not be ``NULL``. + * |client_chosen_version| is a QUIC version that a client chooses. + * |callbacks|, |settings|, and |params| must not be ``NULL``, and the + * function make a copy of each of them. |params| is local QUIC + * transport parameters and sent to a remote endpoint during + * handshake. |user_data| is the arbitrary pointer which is passed to + * the user-defined callback functions. If |mem| is ``NULL``, the + * memory allocator returned by `ngtcp2_mem_default()` is used. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_server_new_versioned( + ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid, + const ngtcp2_path *path, uint32_t client_chosen_version, + int callbacks_version, const ngtcp2_callbacks *callbacks, + int settings_version, const ngtcp2_settings *settings, + int transport_params_version, const ngtcp2_transport_params *params, + const ngtcp2_mem *mem, void *user_data); + +/** + * @function + * + * `ngtcp2_conn_del` frees resources allocated for |conn|. It also + * frees memory pointed by |conn|. + */ +NGTCP2_EXTERN void ngtcp2_conn_del(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_read_pkt` decrypts QUIC packet given in |pkt| of + * length |pktlen| and processes it. |path| is the network path the + * packet is delivered and must not be ``NULL``. |pi| is packet + * metadata and may be ``NULL``. This function performs QUIC handshake + * as well. + * + * This function must not be called from inside the callback + * functions. + * + * This function returns 0 if it succeeds, or negative error codes. + * If :macro:`NGTCP2_ERR_RETRY` is returned, application must be a + * server and it must perform address validation by sending Retry + * packet and discard the connection state. If + * :macro:`NGTCP2_ERR_DROP_CONN` is returned, server application must + * drop the connection silently (without sending any CONNECTION_CLOSE + * frame) and discard connection state. If + * :macro:`NGTCP2_ERR_DRAINING` is returned, a connection has entered + * the draining state, and no further packet transmission is allowed. + * If :macro:`NGTCP2_ERR_CRYPTO` is returned, the error happened in + * TLS stack and `ngtcp2_conn_get_tls_alert` returns TLS alert if set. + * + * If any other negative errors are returned, call + * `ngtcp2_conn_write_connection_close` to get terminal packet, and + * sending it makes QUIC connection enter the closing state. + */ +NGTCP2_EXTERN int +ngtcp2_conn_read_pkt_versioned(ngtcp2_conn *conn, const ngtcp2_path *path, + int pkt_info_version, const ngtcp2_pkt_info *pi, + const uint8_t *pkt, size_t pktlen, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_write_pkt` is equivalent to calling + * `ngtcp2_conn_writev_stream` with -1 as stream_id, no stream data, and + * :macro:`NGTCP2_WRITE_STREAM_FLAG_NONE` as flags. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_pkt_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_handshake_completed` tells |conn| that the TLS stack + * declares TLS handshake completion. This does not mean QUIC + * handshake has completed. The library needs extra conditions to be + * met. + */ +NGTCP2_EXTERN void ngtcp2_conn_handshake_completed(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_handshake_completed` returns nonzero if QUIC handshake + * has completed. + */ +NGTCP2_EXTERN int ngtcp2_conn_get_handshake_completed(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_install_initial_key` installs packet protection keying + * materials for Initial packets. |rx_aead_ctx| is AEAD cipher + * context object and must be initialized with a decryption key. + * |rx_iv| is IV of length |rx_ivlen| for decryption. |rx_hp_ctx| is + * a packet header protection cipher context object for decryption. + * Similarly, |tx_aead_ctx|, |tx_iv| and |tx_hp_ctx| are for + * encrypting outgoing packets and are the same length with the + * decryption counterpart . If they have already been set, they are + * overwritten. + * + * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if + * that is larger. + * + * If this function succeeds, |conn| takes ownership of |rx_aead_ctx|, + * |rx_hp_ctx|, |tx_aead_ctx|, and |tx_hp_ctx|. + * :type:`ngtcp2_delete_crypto_aead_ctx` and + * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete + * these objects when they are no longer used. If this function + * fails, the caller is responsible to delete them. + * + * After receiving Retry packet, the DCID most likely changes. In + * that case, client application must generate these keying materials + * again based on new DCID and install them again. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_install_initial_key( + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx, + const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen); + +/** + * @function + * + * `ngtcp2_conn_install_vneg_initial_key` installs packet protection + * keying materials for Initial packets on compatible version + * negotiation for |version|. |rx_aead_ctx| is AEAD cipher context + * object and must be initialized with a decryption key. |rx_iv| is + * IV of length |rx_ivlen| for decryption. |rx_hp_ctx| is a packet + * header protection cipher context object for decryption. Similarly, + * |tx_aead_ctx|, |tx_iv| and |tx_hp_ctx| are for encrypting outgoing + * packets and are the same length with the decryption counterpart . + * If they have already been set, they are overwritten. + * + * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if + * that is larger. + * + * If this function succeeds, |conn| takes ownership of |rx_aead_ctx|, + * |rx_hp_ctx|, |tx_aead_ctx|, and |tx_hp_ctx|. + * :type:`ngtcp2_delete_crypto_aead_ctx` and + * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete + * these objects when they are no longer used. If this function + * fails, the caller is responsible to delete them. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_install_vneg_initial_key( + ngtcp2_conn *conn, uint32_t version, + const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv, + const ngtcp2_crypto_cipher_ctx *rx_hp_ctx, + const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv, + const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen); + +/** + * @function + * + * `ngtcp2_conn_install_rx_handshake_key` installs packet protection + * keying materials for decrypting incoming Handshake packets. + * |aead_ctx| is AEAD cipher context object which must be initialized + * with a decryption key. |iv| is IV of length |ivlen|. |hp_ctx| is + * a packet header protection cipher context object. + * + * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if + * that is larger. + * + * If this function succeeds, |conn| takes ownership of |aead_ctx|, + * and |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and + * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete + * these objects when they are no longer used. If this function + * fails, the caller is responsible to delete them. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_install_rx_handshake_key( + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); + +/** + * @function + * + * `ngtcp2_conn_install_tx_handshake_key` installs packet protection + * keying materials for encrypting outgoing Handshake packets. + * |aead_ctx| is AEAD cipher context object which must be initialized + * with an encryption key. |iv| is IV of length |ivlen|. |hp_ctx| is + * a packet header protection cipher context object. + * + * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if + * that is larger. + * + * If this function succeeds, |conn| takes ownership of |aead_ctx| and + * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and + * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete + * these objects when they are no longer used. If this function + * fails, the caller is responsible to delete them. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_install_tx_handshake_key( + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); + +/** + * @function + * + * `ngtcp2_conn_install_early_key` installs packet protection AEAD + * cipher context object |aead_ctx|, IV |iv| of length |ivlen|, and + * packet header protection cipher context object |hp_ctx| to encrypt + * (for client) or decrypt (for server) 0RTT packets. + * + * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if + * that is larger. + * + * If this function succeeds, |conn| takes ownership of |aead_ctx| and + * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and + * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete + * these objects when they are no longer used. If this function + * fails, the caller is responsible to delete them. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_install_early_key( + ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx); + +/** + * @function + * + * `ngtcp2_conn_install_rx_key` installs packet protection keying + * materials for decrypting Short header packets. |secret| of length + * |secretlen| is the decryption secret which is used to derive keying + * materials passed to this function. |aead_ctx| is AEAD cipher + * context object which must be initialized with a decryption key. + * |iv| is IV of length |ivlen|. |hp_ctx| is a packet header + * protection cipher context object. + * + * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if + * that is larger. + * + * If this function succeeds, |conn| takes ownership of |aead_ctx| and + * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and + * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete + * these objects when they are no longer used. If this function + * fails, the caller is responsible to delete them. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_install_rx_key( + ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen, + const ngtcp2_crypto_cipher_ctx *hp_ctx); + +/** + * @function + * + * `ngtcp2_conn_install_tx_key` installs packet protection keying + * materials for encrypting Short header packets. |secret| of length + * |secretlen| is the encryption secret which is used to derive keying + * materials passed to this function. |aead_ctx| is AEAD cipher + * context object which must be initialized with an encryption key. + * |iv| is IV of length |ivlen|. |hp_ctx| is a packet header + * protection cipher context object. + * + * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if + * that is larger. + * + * If this function succeeds, |conn| takes ownership of |aead_ctx| and + * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and + * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete + * these objects when they are no longer used. If this function + * fails, the caller is responsible to delete them. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_install_tx_key( + ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen, + const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen, + const ngtcp2_crypto_cipher_ctx *hp_ctx); + +/** + * @function + * + * `ngtcp2_conn_initiate_key_update` initiates the key update. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_INVALID_STATE` + * The previous key update has not been confirmed yet; or key + * update is too frequent; or new keys are not available yet. + */ +NGTCP2_EXTERN int ngtcp2_conn_initiate_key_update(ngtcp2_conn *conn, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_set_tls_error` sets the TLS related error |liberr| in + * |conn|. |liberr| must be one of ngtcp2 library error codes (which + * is defined as NGTCP2_ERR_* macro, such as + * :macro:`NGTCP2_ERR_DECRYPT`). In general, error code should be + * propagated via return value, but sometimes ngtcp2 API is called + * inside callback function of TLS stack and it does not allow to + * return ngtcp2 error code directly. In this case, implementation + * can set the error code (e.g., + * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM`) using this function. + */ +NGTCP2_EXTERN void ngtcp2_conn_set_tls_error(ngtcp2_conn *conn, int liberr); + +/** + * @function + * + * `ngtcp2_conn_get_tls_error` returns the value set by + * `ngtcp2_conn_set_tls_error`. If no value is set, this function + * returns 0. + */ +NGTCP2_EXTERN int ngtcp2_conn_get_tls_error(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_tls_alert` sets a TLS alert |alert| generated by a + * local endpoint to |conn|. + */ +NGTCP2_EXTERN void ngtcp2_conn_set_tls_alert(ngtcp2_conn *conn, uint8_t alert); + +/** + * @function + * + * `ngtcp2_conn_get_tls_alert` returns the value set by + * `ngtcp2_conn_set_tls_alert`. If no value is set, this function + * returns 0. + */ +NGTCP2_EXTERN uint8_t ngtcp2_conn_get_tls_alert(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_keep_alive_timeout` sets keep-alive timeout. If + * nonzero value is given, after a connection is idle at least in a + * given amount of time, a keep-alive packet is sent. If 0 is set, + * keep-alive functionality is disabled and this is the default. + */ +NGTCP2_EXTERN void ngtcp2_conn_set_keep_alive_timeout(ngtcp2_conn *conn, + ngtcp2_duration timeout); + +/** + * @function + * + * `ngtcp2_conn_get_expiry` returns the next expiry time. It returns + * ``UINT64_MAX`` if there is no next expiry. + * + * Call `ngtcp2_conn_handle_expiry()` and `ngtcp2_conn_write_pkt` (or + * `ngtcp2_conn_writev_stream`) if expiry time is passed. + */ +NGTCP2_EXTERN ngtcp2_tstamp ngtcp2_conn_get_expiry(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_handle_expiry` handles expired timer. It does nothing + * if timer is not expired. + */ +NGTCP2_EXTERN int ngtcp2_conn_handle_expiry(ngtcp2_conn *conn, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_get_pto` returns Probe Timeout (PTO). + */ +NGTCP2_EXTERN ngtcp2_duration ngtcp2_conn_get_pto(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_decode_remote_transport_params` decodes QUIC transport + * parameters from the buffer pointed by |data| of length |datalen|, + * and sets the result to |conn|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM` + * The required parameter is missing. + * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM` + * The input is malformed. + * :macro:`NGTCP2_ERR_TRANSPORT_PARAM` + * Failed to validate the remote QUIC transport parameters. + * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE` + * Version negotiation failure. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + */ +NGTCP2_EXTERN int +ngtcp2_conn_decode_remote_transport_params(ngtcp2_conn *conn, + const uint8_t *data, size_t datalen); + +/** + * @function + * + * `ngtcp2_conn_get_remote_transport_params` returns a pointer to the + * remote QUIC transport parameters. If no remote transport + * parameters are set, it returns NULL. + */ +NGTCP2_EXTERN const ngtcp2_transport_params * +ngtcp2_conn_get_remote_transport_params(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_early_remote_transport_params` sets |params| as + * transport parameters previously received from a server. The + * parameters are used to send early data. QUIC requires that client + * application should remember transport parameters along with a + * session ticket. + * + * At least following fields should be set: + * + * - initial_max_stream_id_bidi + * - initial_max_stream_id_uni + * - initial_max_stream_data_bidi_local + * - initial_max_stream_data_bidi_remote + * - initial_max_stream_data_uni + * - initial_max_data + * - active_connection_id_limit + * - max_datagram_frame_size (if DATAGRAM extension was negotiated) + * + * The following fields are ignored: + * + * - ack_delay_exponent + * - max_ack_delay + * - initial_scid + * - original_dcid + * - preferred_address and preferred_address_present + * - retry_scid and retry_scid_present + * - stateless_reset_token and stateless_reset_token_present + */ +NGTCP2_EXTERN void ngtcp2_conn_set_early_remote_transport_params_versioned( + ngtcp2_conn *conn, int transport_params_version, + const ngtcp2_transport_params *params); + +/** + * @function + * + * `ngtcp2_conn_set_local_transport_params` sets the local transport + * parameters |params|. This function can only be called by server. + * Although the local transport parameters are passed to + * `ngtcp2_conn_server_new`, server might want to update them after + * ALPN is chosen. In that case, server can update the transport + * parameter with this function. Server must call this function + * before calling `ngtcp2_conn_install_tx_handshake_key`. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_INVALID_STATE` + * `ngtcp2_conn_install_tx_handshake_key` has been called. + */ +NGTCP2_EXTERN int ngtcp2_conn_set_local_transport_params_versioned( + ngtcp2_conn *conn, int transport_params_version, + const ngtcp2_transport_params *params); + +/** + * @function + * + * `ngtcp2_conn_get_local_transport_params` returns a pointer to the + * local QUIC transport parameters. + */ +NGTCP2_EXTERN const ngtcp2_transport_params * +ngtcp2_conn_get_local_transport_params(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_encode_local_transport_params` encodes the local QUIC + * transport parameters in |dest| of length |destlen|. This is + * equivalent to calling `ngtcp2_conn_get_local_transport_params` and + * then `ngtcp2_encode_transport_params`. + * + * This function returns the number of written, or one of the + * following negative error codes: + * + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_encode_local_transport_params( + ngtcp2_conn *conn, uint8_t *dest, size_t destlen); + +/** + * @function + * + * `ngtcp2_conn_open_bidi_stream` opens new bidirectional stream. The + * |stream_user_data| is the user data specific to the stream. The + * open stream ID is stored in |*pstream_id|. + * + * Application can call this function before handshake completes. For + * 0RTT packet, application can call this function after calling + * `ngtcp2_conn_set_early_remote_transport_params`. For 1RTT packet, + * application can call this function after calling + * `ngtcp2_conn_decode_remote_transport_params` and + * `ngtcp2_conn_install_tx_key`. If ngtcp2 crypto support library is + * used, application can call this function after calling + * `ngtcp2_crypto_derive_and_install_tx_key` for 1RTT packet. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_STREAM_ID_BLOCKED` + * The remote peer does not allow |stream_id| yet. + */ +NGTCP2_EXTERN int ngtcp2_conn_open_bidi_stream(ngtcp2_conn *conn, + int64_t *pstream_id, + void *stream_user_data); + +/** + * @function + * + * `ngtcp2_conn_open_uni_stream` opens new unidirectional stream. The + * |stream_user_data| is the user data specific to the stream. The + * open stream ID is stored in |*pstream_id|. + * + * Application can call this function before handshake completes. For + * 0RTT packet, application can call this function after calling + * `ngtcp2_conn_set_early_remote_transport_params`. For 1RTT packet, + * application can call this function after calling + * `ngtcp2_conn_decode_remote_transport_params` and + * `ngtcp2_conn_install_tx_key`. If ngtcp2 crypto support library is + * used, application can call this function after calling + * `ngtcp2_crypto_derive_and_install_tx_key` for 1RTT packet. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_STREAM_ID_BLOCKED` + * The remote peer does not allow |stream_id| yet. + */ +NGTCP2_EXTERN int ngtcp2_conn_open_uni_stream(ngtcp2_conn *conn, + int64_t *pstream_id, + void *stream_user_data); + +/** + * @function + * + * `ngtcp2_conn_shutdown_stream` closes stream denoted by |stream_id| + * abruptly. |app_error_code| is one of application error codes, and + * indicates the reason of shutdown. Successful call of this function + * does not immediately erase the state of the stream. The actual + * deletion is done when the remote endpoint sends acknowledgement. + * Calling this function is equivalent to call + * `ngtcp2_conn_shutdown_stream_read`, and + * `ngtcp2_conn_shutdown_stream_write` sequentially with the following + * differences. If |stream_id| refers to a local unidirectional + * stream, this function only shutdowns write side of the stream. If + * |stream_id| refers to a remote unidirectional stream, this function + * only shutdowns read side of the stream. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + */ +NGTCP2_EXTERN int ngtcp2_conn_shutdown_stream(ngtcp2_conn *conn, + int64_t stream_id, + uint64_t app_error_code); + +/** + * @function + * + * `ngtcp2_conn_shutdown_stream_write` closes write-side of stream + * denoted by |stream_id| abruptly. |app_error_code| is one of + * application error codes, and indicates the reason of shutdown. If + * this function succeeds, no application data is sent to the remote + * endpoint. It discards all data which has not been acknowledged + * yet. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * |stream_id| refers to a remote unidirectional stream. + */ +NGTCP2_EXTERN int ngtcp2_conn_shutdown_stream_write(ngtcp2_conn *conn, + int64_t stream_id, + uint64_t app_error_code); + +/** + * @function + * + * `ngtcp2_conn_shutdown_stream_read` closes read-side of stream + * denoted by |stream_id| abruptly. |app_error_code| is one of + * application error codes, and indicates the reason of shutdown. If + * this function succeeds, no application data is forwarded to an + * application layer. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * |stream_id| refers to a local unidirectional stream. + */ +NGTCP2_EXTERN int ngtcp2_conn_shutdown_stream_read(ngtcp2_conn *conn, + int64_t stream_id, + uint64_t app_error_code); + +/** + * @macrosection + * + * Write stream data flags + */ + +/** + * @macro + * + * :macro:`NGTCP2_WRITE_STREAM_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_WRITE_STREAM_FLAG_NONE 0x00u + +/** + * @macro + * + * :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` indicates that more data may + * come and should be coalesced into the same packet if possible. + */ +#define NGTCP2_WRITE_STREAM_FLAG_MORE 0x01u + +/** + * @macro + * + * :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` indicates that the passed + * data is the final part of a stream. + */ +#define NGTCP2_WRITE_STREAM_FLAG_FIN 0x02u + +/** + * @function + * + * `ngtcp2_conn_write_stream` is just like + * `ngtcp2_conn_writev_stream`. The only difference is that it + * conveniently accepts a single buffer. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_stream_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_writev_stream` writes a packet containing stream data + * of stream denoted by |stream_id|. The buffer of the packet is + * pointed by |dest| of length |destlen|. This function performs QUIC + * handshake as well. + * + * |destlen| should be at least + * :member:`ngtcp2_settings.max_tx_udp_payload_size`. + * + * Specifying -1 to |stream_id| means no new stream data to send. + * + * If |path| is not ``NULL``, this function stores the network path + * with which the packet should be sent. Each addr field must point + * to the buffer which should be at least ``sizeof(struct + * sockaddr_storage)`` bytes long. The assignment might not be done + * if nothing is written to |dest|. + * + * If |pi| is not ``NULL``, this function stores packet metadata in it + * if it succeeds. The metadata includes ECN markings. When calling + * this function again after it returns + * :macro:`NGTCP2_ERR_WRITE_MORE`, caller must pass the same |pi| to + * this function. + * + * If the all given data is encoded as STREAM frame in |dest|, and if + * |flags| & :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` is nonzero, fin + * flag is set to outgoing STREAM frame. Otherwise, fin flag in + * STREAM frame is not set. + * + * This packet may contain frames other than STREAM frame. The packet + * might not contain STREAM frame if other frames occupy the packet. + * In that case, |*pdatalen| would be -1 if |pdatalen| is not + * ``NULL``. + * + * If |flags| & :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` is nonzero, and + * 0 length STREAM frame is successfully serialized, |*pdatalen| would + * be 0. + * + * The number of data encoded in STREAM frame is stored in |*pdatalen| + * if it is not ``NULL``. The caller must keep the portion of data + * covered by |*pdatalen| bytes in tact until + * :type:`ngtcp2_acked_stream_data_offset` indicates that they are + * acknowledged by a remote endpoint or the stream is closed. + * + * If |flags| equals to :macro:`NGTCP2_WRITE_STREAM_FLAG_NONE`, this + * function produces a single payload of UDP packet. If the given + * stream data is small (e.g., few bytes), the packet might be + * severely under filled. Too many small packet might increase + * overall packet processing costs. Unless there are retransmissions, + * by default, application can only send 1 STREAM frame in one QUIC + * packet. In order to include more than 1 STREAM frame in one QUIC + * packet, specify :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` in |flags|. + * This is analogous to ``MSG_MORE`` flag in :manpage:`send(2)`. If + * the :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` is used, there are 4 + * outcomes: + * + * - The function returns the written length of packet just like + * without :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE`. This is because + * packet is nearly full and the library decided to make a complete + * packet. |*pdatalen| might be -1 or >= 0. It may return 0 which + * indicates that no packet transmission is possible at the moment + * for some reason. + * + * - The function returns :macro:`NGTCP2_ERR_WRITE_MORE`. In this + * case, |*pdatalen| >= 0 is asserted. It indicates that + * application can still call this function with different stream + * data (or `ngtcp2_conn_writev_datagram` if it has data to send in + * unreliable datagram) to pack them into the same packet. + * Application has to specify the same |conn|, |path|, |pi|, |dest|, + * |destlen|, and |ts| parameters, otherwise the behaviour is + * undefined. The application can change |flags|. + * + * - The function returns one of the following negative error codes: + * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED`, + * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND`, + * :macro:`NGTCP2_ERR_STREAM_SHUT_WR`. In this case, |*pdatalen| == + * -1 is asserted. Application can still write the stream data of + * the other streams by calling this function (or + * `ngtcp2_conn_writev_datagram` if it has data to send in + * unreliable datagram) to pack them into the same packet. + * Application has to specify the same |conn|, |path|, |pi|, |dest|, + * |destlen|, and |ts| parameters, otherwise the behaviour is + * undefined. The application can change |flags|. + * + * - The other negative error codes might be returned just like + * without :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE`. These errors + * should be treated as a connection error. + * + * When application uses :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` at + * least once, it must not call other ngtcp2 API functions + * (application can still call `ngtcp2_conn_write_connection_close` to + * handle error from this function), just keep calling this function + * (or `ngtcp2_conn_write_pkt`, or `ngtcp2_conn_writev_datagram`) + * until it returns 0, a positive number (which indicates a complete + * packet is ready), or the error codes other than + * :macro:`NGTCP2_ERR_WRITE_MORE`, + * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED`, + * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND`, and + * :macro:`NGTCP2_ERR_STREAM_SHUT_WR`. If there is no stream data to + * include, call this function with |stream_id| as -1 to stop + * coalescing and write a packet. + * + * This function returns 0 if it cannot write any frame because buffer + * is too small, or packet is congestion limited. Application should + * keep reading and wait for congestion window to grow. + * + * This function must not be called from inside the callback + * functions. + * + * `ngtcp2_conn_update_pkt_tx_time` must be called after this + * function. Application may call this function multiple times before + * calling `ngtcp2_conn_update_pkt_tx_time`. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND` + * Stream does not exist + * :macro:`NGTCP2_ERR_STREAM_SHUT_WR` + * Stream is half closed (local); or stream is being reset. + * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED` + * Packet number is exhausted, and cannot send any more packet. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * The total length of stream data is too large. + * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED` + * Stream is blocked because of flow control. + * :macro:`NGTCP2_ERR_WRITE_MORE` + * (Only when :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` is specified) + * Application can call this function to pack more stream data + * into the same packet. See above to know how it works. + * + * In general, if the error code which satisfies + * `ngtcp2_err_is_fatal(err) <ngtcp2_err_is_fatal>` != 0 is returned, + * the application should just close the connection by calling + * `ngtcp2_conn_write_connection_close` or just delete the QUIC + * connection using `ngtcp2_conn_del`. It is undefined to call the + * other library functions. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_writev_stream_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen, + uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts); + +/** + * @macrosection + * + * Write datagram flags + */ + +/** + * @macro + * + * :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_NONE` indicates no flag set. + */ +#define NGTCP2_WRITE_DATAGRAM_FLAG_NONE 0x00u + +/** + * @macro + * + * :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE` indicates that more data + * may come and should be coalesced into the same packet if possible. + */ +#define NGTCP2_WRITE_DATAGRAM_FLAG_MORE 0x01u + +/** + * @function + * + * `ngtcp2_conn_writev_datagram` writes a packet containing unreliable + * data in DATAGRAM frame. The buffer of the packet is pointed by + * |dest| of length |destlen|. This function performs QUIC handshake + * as well. + * + * |destlen| should be at least + * :member:`ngtcp2_settings.max_tx_udp_payload_size`. + * + * For |path| and |pi| parameters, refer to + * `ngtcp2_conn_writev_stream`. + * + * If the given data is written to the buffer, nonzero value is + * assigned to |*paccepted| if it is not NULL. The data in DATAGRAM + * frame cannot be fragmented; writing partial data is not possible. + * + * |dgram_id| is an opaque identifier which should uniquely identify + * the given DATAGRAM. It is passed to :type:`ngtcp2_ack_datagram` + * callback when a packet that contains DATAGRAM frame is + * acknowledged. It is passed to :type:`ngtcp2_lost_datagram` + * callback when a packet that contains DATAGRAM frame is declared + * lost. If an application uses neither of those callbacks, it can + * sets 0 to this parameter. + * + * This function might write other frames other than DATAGRAM, just + * like `ngtcp2_conn_writev_stream`. + * + * If the function returns 0, it means that no more data cannot be + * sent because of congestion control limit; or, data does not fit + * into the provided buffer; or, a local endpoint, as a server, is + * unable to send data because of its amplification limit. In this + * case, |*paccepted| is assigned zero if it is not NULL. + * + * If :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE` is set in |flags|, + * there are 3 outcomes: + * + * - The function returns the written length of packet just like + * without :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE`. This is + * because packet is nearly full and the library decided to make a + * complete packet. |*paccepted| might be zero or nonzero. + * + * - The function returns :macro:`NGTCP2_ERR_WRITE_MORE`. In this + * case, |*paccepted| != 0 is asserted. This indicates that + * application can call this function with another unreliable data + * (or `ngtcp2_conn_writev_stream` if it has stream data to send) to + * pack them into the same packet. Application has to specify the + * same |conn|, |path|, |pi|, |dest|, |destlen|, and |ts| + * parameters, otherwise the behaviour is undefined. The + * application can change |flags|. + * + * - The other error might be returned just like without + * :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE`. + * + * When application sees :macro:`NGTCP2_ERR_WRITE_MORE`, it must not + * call other ngtcp2 API functions (application can still call + * `ngtcp2_conn_write_connection_close` to handle error from this + * function). Just keep calling `ngtcp2_conn_writev_datagram`, + * `ngtcp2_conn_writev_stream` or `ngtcp2_conn_write_pkt` until it + * returns a positive number (which indicates a complete packet is + * ready). + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED` + * Packet number is exhausted, and cannot send any more packet. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + * :macro:`NGTCP2_ERR_WRITE_MORE` + * (Only when :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE` is + * specified) Application can call this function to pack more data + * into the same packet. See above to know how it works. + * :macro:`NGTCP2_ERR_INVALID_STATE` + * A remote endpoint did not express the DATAGRAM frame support. + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * The provisional DATAGRAM frame size exceeds the maximum + * DATAGRAM frame size that a remote endpoint can receive. + * + * In general, if the error code which satisfies + * `ngtcp2_err_is_fatal(err) <ngtcp2_err_is_fatal>` != 0 is returned, + * the application should just close the connection by calling + * `ngtcp2_conn_write_connection_close` or just delete the QUIC + * connection using `ngtcp2_conn_del`. It is undefined to call the + * other library functions. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_writev_datagram_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted, + uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_is_in_closing_period` returns nonzero if |conn| is in + * the closing period. + */ +NGTCP2_EXTERN int ngtcp2_conn_is_in_closing_period(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_is_in_draining_period` returns nonzero if |conn| is in + * the draining period. + */ +NGTCP2_EXTERN int ngtcp2_conn_is_in_draining_period(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_extend_max_stream_offset` extends stream's max stream + * data value by |datalen|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_extend_max_stream_offset(ngtcp2_conn *conn, + int64_t stream_id, + uint64_t datalen); + +/** + * @function + * + * `ngtcp2_conn_extend_max_offset` extends max data offset by + * |datalen|. + */ +NGTCP2_EXTERN void ngtcp2_conn_extend_max_offset(ngtcp2_conn *conn, + uint64_t datalen); + +/** + * @function + * + * `ngtcp2_conn_extend_max_streams_bidi` extends the number of maximum + * local bidirectional streams that a remote endpoint can open by |n|. + * + * The library does not increase maximum stream limit automatically. + * The exception is when a stream is closed without + * :type:`ngtcp2_stream_open` callback being called. In this case, + * stream limit is increased automatically. + */ +NGTCP2_EXTERN void ngtcp2_conn_extend_max_streams_bidi(ngtcp2_conn *conn, + size_t n); + +/** + * @function + * + * `ngtcp2_conn_extend_max_streams_uni` extends the number of maximum + * local unidirectional streams that a remote endpoint can open by + * |n|. + * + * The library does not increase maximum stream limit automatically. + * The exception is when a stream is closed without + * :type:`ngtcp2_stream_open` callback being called. In this case, + * stream limit is increased automatically. + */ +NGTCP2_EXTERN void ngtcp2_conn_extend_max_streams_uni(ngtcp2_conn *conn, + size_t n); + +/** + * @function + * + * `ngtcp2_conn_get_dcid` returns the non-NULL pointer to destination + * connection ID. If no destination connection ID is present, the + * return value is not ``NULL``, and its datalen field is 0. + */ +NGTCP2_EXTERN const ngtcp2_cid *ngtcp2_conn_get_dcid(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_client_initial_dcid` returns the non-NULL pointer + * to the Destination Connection ID that client sent in its Initial + * packet. + */ +NGTCP2_EXTERN const ngtcp2_cid * +ngtcp2_conn_get_client_initial_dcid(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_num_scid` returns the number of source connection + * IDs which the local endpoint has provided to the peer and have not + * retired. + */ +NGTCP2_EXTERN size_t ngtcp2_conn_get_num_scid(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_scid` writes the all source connection IDs which + * the local endpoint has provided to the peer and have not retired in + * |dest|. The buffer pointed by |dest| must have + * ``sizeof(ngtcp2_cid) * n`` bytes available, where n is the return + * value of `ngtcp2_conn_get_num_scid()`. + */ +NGTCP2_EXTERN size_t ngtcp2_conn_get_scid(ngtcp2_conn *conn, ngtcp2_cid *dest); + +/** + * @function + * + * `ngtcp2_conn_get_num_active_dcid` returns the number of the active + * destination connection ID. + */ +NGTCP2_EXTERN size_t ngtcp2_conn_get_num_active_dcid(ngtcp2_conn *conn); + +/** + * @struct + * + * :type:`ngtcp2_cid_token` is the convenient struct to store + * Connection ID, its associated path, and stateless reset token. + */ +typedef struct ngtcp2_cid_token { + /** + * :member:`seq` is the sequence number of this Connection ID. + */ + uint64_t seq; + /** + * :member:`cid` is Connection ID. + */ + ngtcp2_cid cid; + /** + * :member:`ps` is the path which is associated to this Connection + * ID. + */ + ngtcp2_path_storage ps; + /** + * :member:`token` is the stateless reset token for this Connection + * ID. + */ + uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN]; + /** + * :member:`token_present` is nonzero if token contains stateless + * reset token. + */ + uint8_t token_present; +} ngtcp2_cid_token; + +/** + * @function + * + * `ngtcp2_conn_get_active_dcid` writes the all active destination + * connection IDs and tokens to |dest|. The buffer pointed by |dest| + * must have ``sizeof(ngtcp2_cid_token) * n`` bytes available, where n + * is the return value of `ngtcp2_conn_get_num_active_dcid()`. + */ +NGTCP2_EXTERN size_t ngtcp2_conn_get_active_dcid(ngtcp2_conn *conn, + ngtcp2_cid_token *dest); + +/** + * @function + * + * `ngtcp2_conn_get_client_chosen_version` returns the client chosen + * version. + */ +NGTCP2_EXTERN uint32_t ngtcp2_conn_get_client_chosen_version(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_negotiated_version` returns the negotiated version. + * + * Until the version is negotiated, this function returns 0. + */ +NGTCP2_EXTERN uint32_t ngtcp2_conn_get_negotiated_version(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_early_data_rejected` tells |conn| that early data was + * rejected by a server, or client decided not to attempt early data + * for some reason. |conn| discards the following connection states: + * + * - Any opended streams. + * - Stream identifier allocations. + * - Max data extended by `ngtcp2_conn_extend_max_offset`. + * - Max bidi streams extended by `ngtcp2_conn_extend_max_streams_bidi`. + * - Max uni streams extended by `ngtcp2_conn_extend_max_streams_uni`. + * + * Application which wishes to retransmit early data, it has to open + * streams and send stream data again. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + */ +NGTCP2_EXTERN int ngtcp2_conn_early_data_rejected(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_early_data_rejected` returns nonzero if + * `ngtcp2_conn_early_data_rejected` has been called. + */ +NGTCP2_EXTERN int ngtcp2_conn_get_early_data_rejected(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_conn_stat` assigns connection statistics data to + * |*cstat|. + */ +NGTCP2_EXTERN void ngtcp2_conn_get_conn_stat_versioned(ngtcp2_conn *conn, + int conn_stat_version, + ngtcp2_conn_stat *cstat); + +/** + * @function + * + * `ngtcp2_conn_submit_crypto_data` submits crypto stream data |data| + * of length |datalen| to the library for transmission. The + * encryption level is given in |crypto_level|. + * + * The library makes a copy of the buffer pointed by |data| of length + * |datalen|. Application can discard |data|. + */ +NGTCP2_EXTERN int +ngtcp2_conn_submit_crypto_data(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + const uint8_t *data, const size_t datalen); + +/** + * @function + * + * `ngtcp2_conn_submit_new_token` submits address validation token. + * It is sent in NEW_TOKEN frame. Only server can call this function. + * |tokenlen| must not be 0. + * + * This function makes a copy of the buffer pointed by |token| of + * length |tokenlen|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory. + */ +NGTCP2_EXTERN int ngtcp2_conn_submit_new_token(ngtcp2_conn *conn, + const uint8_t *token, + size_t tokenlen); + +/** + * @function + * + * `ngtcp2_conn_set_local_addr` sets local endpoint address |addr| to + * the current path of |conn|. + */ +NGTCP2_EXTERN void ngtcp2_conn_set_local_addr(ngtcp2_conn *conn, + const ngtcp2_addr *addr); + +/** + * @function + * + * `ngtcp2_conn_set_path_user_data` sets the |path_user_data| to the + * current path (see :member:`ngtcp2_path.user_data`). + */ +NGTCP2_EXTERN void ngtcp2_conn_set_path_user_data(ngtcp2_conn *conn, + void *path_user_data); + +/** + * @function + * + * `ngtcp2_conn_get_path` returns the current path. + */ +NGTCP2_EXTERN const ngtcp2_path *ngtcp2_conn_get_path(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_max_tx_udp_payload_size` returns the maximum UDP + * payload size that this local endpoint would send. This is the + * value of :member:`ngtcp2_settings.max_tx_udp_payload_size` that is + * passed to `ngtcp2_conn_client_new` or `ngtcp2_conn_server_new`. + */ +NGTCP2_EXTERN size_t ngtcp2_conn_get_max_tx_udp_payload_size(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_path_max_tx_udp_payload_size` returns the maximum + * UDP payload size for the current path. If + * :member:`ngtcp2_settings.no_tx_udp_payload_size_shaping` is set to + * nonzero, this function is equivalent to + * `ngtcp2_conn_get_max_tx_udp_payload_size`. Otherwise, it returns + * the maximum UDP payload size that is probed for the current path. + */ +NGTCP2_EXTERN size_t +ngtcp2_conn_get_path_max_tx_udp_payload_size(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_initiate_immediate_migration` starts connection + * migration to the given |path|. Only client can initiate migration. + * This function does immediate migration; while the path validation + * is nonetheless performed, this function does not wait for it to + * succeed. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_INVALID_STATE` + * Migration is disabled; or handshake is not yet confirmed; or + * client is migrating to server's preferred address. + * :macro:`NGTCP2_ERR_CONN_ID_BLOCKED` + * No unused connection ID is available. + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * |local_addr| equals the current local address. + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + */ +NGTCP2_EXTERN int ngtcp2_conn_initiate_immediate_migration( + ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_initiate_migration` starts connection migration to the + * given |path|. Only client can initiate migration. Unlike + * `ngtcp2_conn_initiate_immediate_migration`, this function starts a + * path validation with a new path and migrate to the new path after + * successful path validation. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_INVALID_STATE` + * Migration is disabled; or handshake is not yet confirmed; or + * client is migrating to server's preferred address. + * :macro:`NGTCP2_ERR_CONN_ID_BLOCKED` + * No unused connection ID is available. + * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` + * |local_addr| equals the current local address. + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + */ +NGTCP2_EXTERN int ngtcp2_conn_initiate_migration(ngtcp2_conn *conn, + const ngtcp2_path *path, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_get_max_local_streams_uni` returns the cumulative + * number of streams which local endpoint can open. + */ +NGTCP2_EXTERN uint64_t ngtcp2_conn_get_max_local_streams_uni(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_max_data_left` returns the number of bytes that + * this local endpoint can send in this connection. + */ +NGTCP2_EXTERN uint64_t ngtcp2_conn_get_max_data_left(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_max_stream_data_left` returns the number of bytes + * that this local endpoint can send to a stream identified by + * |stream_id|. If no such stream is found, this function returns 0. + */ +NGTCP2_EXTERN uint64_t ngtcp2_conn_get_max_stream_data_left(ngtcp2_conn *conn, + int64_t stream_id); + +/** + * @function + * + * `ngtcp2_conn_get_streams_bidi_left` returns the number of + * bidirectional streams which the local endpoint can open without + * violating stream concurrency limit. + */ +NGTCP2_EXTERN uint64_t ngtcp2_conn_get_streams_bidi_left(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_streams_uni_left` returns the number of + * unidirectional streams which the local endpoint can open without + * violating stream concurrency limit. + */ +NGTCP2_EXTERN uint64_t ngtcp2_conn_get_streams_uni_left(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_cwnd_left` returns the cwnd minus the number of + * bytes in flight on the current path. If the former is smaller than + * the latter, this function returns 0. + */ +NGTCP2_EXTERN uint64_t ngtcp2_conn_get_cwnd_left(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_initial_crypto_ctx` sets |ctx| for Initial packet + * encryption. The passed data will be passed to + * :type:`ngtcp2_encrypt`, :type:`ngtcp2_decrypt` and + * :type:`ngtcp2_hp_mask` callbacks. + */ +NGTCP2_EXTERN void +ngtcp2_conn_set_initial_crypto_ctx(ngtcp2_conn *conn, + const ngtcp2_crypto_ctx *ctx); + +/** + * @function + * + * `ngtcp2_conn_get_initial_crypto_ctx` returns + * :type:`ngtcp2_crypto_ctx` object for Initial packet encryption. + */ +NGTCP2_EXTERN const ngtcp2_crypto_ctx * +ngtcp2_conn_get_initial_crypto_ctx(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_crypto_ctx` sets |ctx| for Handshake/1RTT packet + * encryption. The passed data will be passed to + * :type:`ngtcp2_encrypt`, :type:`ngtcp2_decrypt` and + * :type:`ngtcp2_hp_mask` callbacks. + */ +NGTCP2_EXTERN void ngtcp2_conn_set_crypto_ctx(ngtcp2_conn *conn, + const ngtcp2_crypto_ctx *ctx); + +/** + * @function + * + * `ngtcp2_conn_get_tls_native_handle` returns TLS native handle set by + * `ngtcp2_conn_set_tls_native_handle()`. + */ +NGTCP2_EXTERN void *ngtcp2_conn_get_tls_native_handle(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_tls_native_handle` sets TLS native handle + * |tls_native_handle| to |conn|. Internally, it is used as an opaque + * pointer. + */ +NGTCP2_EXTERN void ngtcp2_conn_set_tls_native_handle(ngtcp2_conn *conn, + void *tls_native_handle); + +/** + * @function + * + * `ngtcp2_conn_set_retry_aead` sets |aead| and |aead_ctx| for Retry + * integrity tag verification. |aead| must be AEAD_AES_128_GCM. + * |aead_ctx| must be initialized with :macro:`NGTCP2_RETRY_KEY` as + * encryption key. This function must be called if |conn| is + * initialized as client. Server does not verify the tag and has no + * need to call this function. + * + * If this function succeeds, |conn| takes ownership of |aead_ctx|. + * :type:`ngtcp2_delete_crypto_aead_ctx` will be called to delete this + * object when it is no longer used. If this function fails, the + * caller is responsible to delete it. + */ +NGTCP2_EXTERN void +ngtcp2_conn_set_retry_aead(ngtcp2_conn *conn, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx); + +/** + * @function + * + * `ngtcp2_conn_get_crypto_ctx` returns :type:`ngtcp2_crypto_ctx` + * object for Handshake/1RTT packet encryption. + */ +NGTCP2_EXTERN const ngtcp2_crypto_ctx * +ngtcp2_conn_get_crypto_ctx(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_early_crypto_ctx` sets |ctx| for 0RTT packet + * encryption. The passed data will be passed to + * :type:`ngtcp2_encrypt`, :type:`ngtcp2_decrypt` and + * :type:`ngtcp2_hp_mask` callbacks. + */ +NGTCP2_EXTERN void +ngtcp2_conn_set_early_crypto_ctx(ngtcp2_conn *conn, + const ngtcp2_crypto_ctx *ctx); + +/** + * @function + * + * `ngtcp2_conn_get_early_crypto_ctx` returns + * :type:`ngtcp2_crypto_ctx` object for 0RTT packet encryption. + */ +NGTCP2_EXTERN const ngtcp2_crypto_ctx * +ngtcp2_conn_get_early_crypto_ctx(ngtcp2_conn *conn); + +/** + * @enum + * + * :type:`ngtcp2_connection_close_error_code_type` defines connection + * error code type. + */ +typedef enum ngtcp2_connection_close_error_code_type { + /** + * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT` + * indicates the error code is QUIC transport error code. + */ + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT, + /** + * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION` + * indicates the error code is application error code. + */ + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION, + /** + * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION` + * is a special case of QUIC transport error, and it indicates that + * client receives Version Negotiation packet. + */ + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION, + /** + * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE` + * is a special case of QUIC transport error, and it indicates that + * connection is closed because of idle timeout. + */ + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE +} ngtcp2_connection_close_error_code_type; + +/** + * @struct + * + * :type:`ngtcp2_connection_close_error` contains connection + * error code, its type, and the optional reason phrase. + */ +typedef struct ngtcp2_connection_close_error { + /** + * :member:`type` is the type of :member:`error_code`. + */ + ngtcp2_connection_close_error_code_type type; + /** + * :member:`error_code` is the error code for connection closure. + */ + uint64_t error_code; + /** + * :member:`frame_type` is the type of QUIC frame which triggers + * this connection error. This field is set to 0 if the frame type + * is unknown. + */ + uint64_t frame_type; + /** + * :member:`reason` points to the buffer which contains a reason + * phrase. It may be NULL if there is no reason phrase. If it is + * received from a remote endpoint, it is truncated to at most 1024 + * bytes. + */ + const uint8_t *reason; + /** + * :member:`reasonlen` is the length of data pointed by + * :member:`reason`. + */ + size_t reasonlen; +} ngtcp2_connection_close_error; + +/** + * @function + * + * `ngtcp2_connection_close_error_default` initializes |ccerr| with + * the default values. It sets the following fields: + * + * - :member:`type <ngtcp2_connection_close_error.type>` = + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT` + * - :member:`error_code <ngtcp2_connection_close_error.error_code>` = + * :macro:`NGTCP2_NO_ERROR`. + * - :member:`frame_type <ngtcp2_connection_close_error.frame_type>` = + * 0 + * - :member:`reason <ngtcp2_connection_close_error.reason>` = NULL + * - :member:`reasonlen <ngtcp2_connection_close_error.reasonlen>` = 0 + */ +NGTCP2_EXTERN void +ngtcp2_connection_close_error_default(ngtcp2_connection_close_error *ccerr); + +/** + * @function + * + * `ngtcp2_connection_close_error_set_transport_error` sets + * :member:`ccerr->type <ngtcp2_connection_close_error.type>` to + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`, + * and :member:`ccerr->error_code + * <ngtcp2_connection_close_error.error_code>` to |error_code|. + * |reason| is the reason phrase of length |reasonlen|. This function + * does not make a copy of the reason phrase. + */ +NGTCP2_EXTERN void ngtcp2_connection_close_error_set_transport_error( + ngtcp2_connection_close_error *ccerr, uint64_t error_code, + const uint8_t *reason, size_t reasonlen); + +/** + * @function + * + * `ngtcp2_connection_close_error_set_transport_error_liberr` sets + * type and error_code based on |liberr|. + * + * If |liberr| is :macro:`NGTCP2_ERR_RECV_VERSION_NEGOTIATION`, + * :member:`ccerr->type <ngtcp2_connection_close_error.type>` is set + * to + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION`, + * and :member:`ccerr->error_code + * <ngtcp2_connection_close_error.error_code>` to + * :macro:`NGTCP2_NO_ERROR`. If |liberr| is + * :macro:`NGTCP2_ERR_IDLE_CLOSE`, :member:`ccerr->type + * <ngtcp2_connection_close_error.type>` is set to + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE`, + * and :member:`ccerr->error_code + * <ngtcp2_connection_close_error.error_code>` to + * :macro:`NGTCP2_NO_ERROR`. Otherwise, :member:`ccerr->type + * <ngtcp2_connection_close_error.type>` is set to + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`, + * and :member:`ccerr->error_code + * <ngtcp2_connection_close_error.error_code>` is set to an error code + * inferred by |liberr| (see + * `ngtcp2_err_infer_quic_transport_error_code`). |reason| is the + * reason phrase of length |reasonlen|. This function does not make a + * copy of the reason phrase. + */ +NGTCP2_EXTERN void ngtcp2_connection_close_error_set_transport_error_liberr( + ngtcp2_connection_close_error *ccerr, int liberr, const uint8_t *reason, + size_t reasonlen); + +/** + * @function + * + * `ngtcp2_connection_close_error_set_transport_error_tls_alert` sets + * :member:`ccerr->type <ngtcp2_connection_close_error.type>` to + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`, + * and :member:`ccerr->error_code + * <ngtcp2_connection_close_error.error_code>` to bitwise-OR of + * :macro:`NGTCP2_CRYPTO_ERROR` and |tls_alert|. |reason| is the + * reason phrase of length |reasonlen|. This function does not make a + * copy of the reason phrase. + */ +NGTCP2_EXTERN void ngtcp2_connection_close_error_set_transport_error_tls_alert( + ngtcp2_connection_close_error *ccerr, uint8_t tls_alert, + const uint8_t *reason, size_t reasonlen); + +/** + * @function + * + * `ngtcp2_connection_close_error_set_application_error` sets + * :member:`ccerr->type <ngtcp2_connection_close_error.type>` to + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION`, + * and :member:`ccerr->error_code + * <ngtcp2_connection_close_error.error_code>` to |error_code|. + * |reason| is the reason phrase of length |reasonlen|. This function + * does not make a copy of the reason phrase. + */ +NGTCP2_EXTERN void ngtcp2_connection_close_error_set_application_error( + ngtcp2_connection_close_error *ccerr, uint64_t error_code, + const uint8_t *reason, size_t reasonlen); + +/** + * @function + * + * `ngtcp2_conn_write_connection_close` writes a packet which contains + * CONNECTION_CLOSE frame(s) (type 0x1c or 0x1d) in the buffer pointed + * by |dest| whose capacity is |destlen|. + * + * For client, |destlen| should be at least + * :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE`. + * + * If |path| is not ``NULL``, this function stores the network path + * with which the packet should be sent. Each addr field must point + * to the buffer which should be at least ``sizeof(struct + * sockaddr_storage)`` bytes long. The assignment might not be done + * if nothing is written to |dest|. + * + * If |pi| is not ``NULL``, this function stores packet metadata in it + * if it succeeds. The metadata includes ECN markings. + * + * If :member:`ccerr->type <ngtcp2_connection_close_error.type>` == + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`, + * this function sends CONNECTION_CLOSE (type 0x1c) frame. If + * :member:`ccerr->type <ngtcp2_connection_close_error.type>` == + * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION`, + * it sends CONNECTION_CLOSE (type 0x1d) frame. Otherwise, it does + * not produce any data, and returns 0. + * + * This function must not be called from inside the callback + * functions. + * + * At the moment, successful call to this function makes connection + * close. We may change this behaviour in the future to allow + * graceful shutdown. + * + * This function returns the number of bytes written in |dest| if it + * succeeds, or one of the following negative error codes: + * + * :macro:`NGTCP2_ERR_NOMEM` + * Out of memory + * :macro:`NGTCP2_ERR_NOBUF` + * Buffer is too small + * :macro:`NGTCP2_ERR_INVALID_STATE` + * The current state does not allow sending CONNECTION_CLOSE. + * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED` + * Packet number is exhausted, and cannot send any more packet. + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` + * User callback failed + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_connection_close_versioned( + ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version, + ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, + const ngtcp2_connection_close_error *ccerr, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_get_connection_close_error` stores the received + * connection close error in |ccerr|. + */ +NGTCP2_EXTERN void +ngtcp2_conn_get_connection_close_error(ngtcp2_conn *conn, + ngtcp2_connection_close_error *ccerr); + +/** + * @function + * + * `ngtcp2_conn_is_local_stream` returns nonzero if |stream_id| denotes the + * stream which a local endpoint issues. + */ +NGTCP2_EXTERN int ngtcp2_conn_is_local_stream(ngtcp2_conn *conn, + int64_t stream_id); + +/** + * @function + * + * `ngtcp2_conn_is_server` returns nonzero if |conn| is initialized as + * server. + */ +NGTCP2_EXTERN int ngtcp2_conn_is_server(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_after_retry` returns nonzero if |conn| as a client has + * received Retry packet from server and successfully validated it. + */ +NGTCP2_EXTERN int ngtcp2_conn_after_retry(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_set_stream_user_data` sets |stream_user_data| to the + * stream identified by |stream_id|. + * + * This function returns 0 if it succeeds, or one of the following + * negative error codes: + * + * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND` + * Stream does not exist + */ +NGTCP2_EXTERN int ngtcp2_conn_set_stream_user_data(ngtcp2_conn *conn, + int64_t stream_id, + void *stream_user_data); + +/** + * @function + * + * `ngtcp2_conn_update_pkt_tx_time` sets the time instant of the next + * packet transmission. This function must be called after (multiple + * invocation of) `ngtcp2_conn_writev_stream`. If packet aggregation + * (e.g., packet batching, GSO) is used, call this function after all + * aggregated datagrams are sent, which indicates multiple invocation + * of `ngtcp2_conn_writev_stream`. + */ +NGTCP2_EXTERN void ngtcp2_conn_update_pkt_tx_time(ngtcp2_conn *conn, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_conn_get_send_quantum` returns the maximum number of bytes + * that can be sent in one go without packet spacing. + */ +NGTCP2_EXTERN size_t ngtcp2_conn_get_send_quantum(ngtcp2_conn *conn); + +/** + * @function + * + * `ngtcp2_conn_get_stream_loss_count` returns the number of packets + * that contain STREAM frame for a stream identified by |stream_id| + * and are declared to be lost. The number may include the spurious + * losses. If no stream identified by |stream_id| is found, this + * function returns 0. + */ +NGTCP2_EXTERN size_t ngtcp2_conn_get_stream_loss_count(ngtcp2_conn *conn, + int64_t stream_id); + +/** + * @function + * + * `ngtcp2_strerror` returns the text representation of |liberr|. + * |liberr| must be one of ngtcp2 library error codes (which is + * defined as NGTCP2_ERR_* macro, such as + * :macro:`NGTCP2_ERR_DECRYPT`). + */ +NGTCP2_EXTERN const char *ngtcp2_strerror(int liberr); + +/** + * @function + * + * `ngtcp2_err_is_fatal` returns nonzero if |liberr| is a fatal error. + * |liberr| must be one of ngtcp2 library error codes (which is + * defined as NGTCP2_ERR_* macro, such as + * :macro:`NGTCP2_ERR_DECRYPT`). + */ +NGTCP2_EXTERN int ngtcp2_err_is_fatal(int liberr); + +/** + * @function + * + * `ngtcp2_err_infer_quic_transport_error_code` returns a QUIC + * transport error code which corresponds to |liberr|. |liberr| must + * be one of ngtcp2 library error codes (which is defined as + * NGTCP2_ERR_* macro, such as :macro:`NGTCP2_ERR_DECRYPT`). + */ +NGTCP2_EXTERN uint64_t ngtcp2_err_infer_quic_transport_error_code(int liberr); + +/** + * @function + * + * `ngtcp2_addr_init` initializes |dest| with the given arguments and + * returns |dest|. + */ +NGTCP2_EXTERN ngtcp2_addr *ngtcp2_addr_init(ngtcp2_addr *dest, + const ngtcp2_sockaddr *addr, + ngtcp2_socklen addrlen); + +/** + * @function + * + * `ngtcp2_addr_copy_byte` copies |addr| of length |addrlen| into the + * buffer pointed by :member:`dest->addr <ngtcp2_addr.addr>`. + * :member:`dest->addrlen <ngtcp2_addr.addrlen>` is updated to have + * |addrlen|. This function assumes that :member:`dest->addr + * <ngtcp2_addr.addr>` points to a buffer which has a sufficient + * capacity to store the copy. + */ +NGTCP2_EXTERN void ngtcp2_addr_copy_byte(ngtcp2_addr *dest, + const ngtcp2_sockaddr *addr, + ngtcp2_socklen addrlen); + +/** + * @function + * + * `ngtcp2_path_storage_init` initializes |ps| with the given + * arguments. This function copies |local_addr| and |remote_addr|. + */ +NGTCP2_EXTERN void ngtcp2_path_storage_init(ngtcp2_path_storage *ps, + const ngtcp2_sockaddr *local_addr, + ngtcp2_socklen local_addrlen, + const ngtcp2_sockaddr *remote_addr, + ngtcp2_socklen remote_addrlen, + void *user_data); + +/** + * @function + * + * `ngtcp2_path_storage_zero` initializes |ps| with the zero length + * addresses. + */ +NGTCP2_EXTERN void ngtcp2_path_storage_zero(ngtcp2_path_storage *ps); + +/** + * @function + * + * `ngtcp2_settings_default` initializes |settings| with the default + * values. First this function fills |settings| with 0 and set the + * default value to the following fields: + * + * * :type:`cc_algo <ngtcp2_settings.cc_algo>` = + * :enum:`ngtcp2_cc_algo.NGTCP2_CC_ALGO_CUBIC` + * * :type:`initial_rtt <ngtcp2_settings.initial_rtt>` = + * :macro:`NGTCP2_DEFAULT_INITIAL_RTT` + * * :type:`ack_thresh <ngtcp2_settings.ack_thresh>` = 2 + * * :type:`max_tx_udp_payload_size + * <ngtcp2_settings.max_tx_udp_payload_size>` = 1452 + * * :type:`handshake_timeout <ngtcp2_settings.handshake_timeout>` = + * :macro:`NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT`. + */ +NGTCP2_EXTERN void ngtcp2_settings_default_versioned(int settings_version, + ngtcp2_settings *settings); + +/** + * @function + * + * `ngtcp2_transport_params_default` initializes |params| with the + * default values. First this function fills |params| with 0 and set + * the default value to the following fields: + * + * * :type:`max_udp_payload_size + * <ngtcp2_transport_params.max_udp_payload_size>` = + * :macro:`NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE` + * * :type:`ack_delay_exponent + * <ngtcp2_transport_params.ack_delay_exponent>` = + * :macro:`NGTCP2_DEFAULT_ACK_DELAY_EXPONENT` + * * :type:`max_ack_delay <ngtcp2_transport_params.max_ack_delay>` = + * :macro:`NGTCP2_DEFAULT_MAX_ACK_DELAY` + * * :type:`active_connection_id_limit + * <ngtcp2_transport_params.active_connection_id_limit>` = + * :macro:`NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT` + */ +NGTCP2_EXTERN void +ngtcp2_transport_params_default_versioned(int transport_params_version, + ngtcp2_transport_params *params); + +/** + * @function + * + * `ngtcp2_mem_default` returns the default, system standard memory + * allocator. + */ +NGTCP2_EXTERN const ngtcp2_mem *ngtcp2_mem_default(void); + +/** + * @macrosection + * + * ngtcp2_info macros + */ + +/** + * @macro + * + * :macro:`NGTCP2_VERSION_AGE` is the age of :type:`ngtcp2_info` + */ +#define NGTCP2_VERSION_AGE 1 + +/** + * @struct + * + * :type:`ngtcp2_info` is what `ngtcp2_version()` returns. It holds + * information about the particular ngtcp2 version. + */ +typedef struct ngtcp2_info { + /** + * :member:`age` is the age of this struct. This instance of ngtcp2 + * sets it to :macro:`NGTCP2_VERSION_AGE` but a future version may + * bump it and add more struct fields at the bottom + */ + int age; + /** + * :member:`version_num` is the :macro:`NGTCP2_VERSION_NUM` number + * (since age ==1) + */ + int version_num; + /** + * :member:`version_str` points to the :macro:`NGTCP2_VERSION` + * string (since age ==1) + */ + const char *version_str; + /* -------- the above fields all exist when age == 1 */ +} ngtcp2_info; + +/** + * @function + * + * `ngtcp2_version` returns a pointer to a ngtcp2_info struct with + * version information about the run-time library in use. The + * |least_version| argument can be set to a 24 bit numerical value for + * the least accepted version number and if the condition is not met, + * this function will return a ``NULL``. Pass in 0 to skip the + * version checking. + */ +NGTCP2_EXTERN const ngtcp2_info *ngtcp2_version(int least_version); + +/** + * @function + * + * `ngtcp2_is_bidi_stream` returns nonzero if |stream_id| denotes + * bidirectional stream. + */ +NGTCP2_EXTERN int ngtcp2_is_bidi_stream(int64_t stream_id); + +/** + * @function + * + * `ngtcp2_path_copy` copies |src| into |dest|. This function assumes + * that |dest| has enough buffer to store the deep copy of + * :member:`src->local <ngtcp2_path.local>` and :member:`src->remote + * <ngtcp2_path.remote>`. + */ +NGTCP2_EXTERN void ngtcp2_path_copy(ngtcp2_path *dest, const ngtcp2_path *src); + +/** + * @function + * + * `ngtcp2_path_eq` returns nonzero if |a| and |b| shares the same + * local and remote addresses. + */ +NGTCP2_EXTERN int ngtcp2_path_eq(const ngtcp2_path *a, const ngtcp2_path *b); + +/** + * @function + * + * `ngtcp2_is_supported_version` returns nonzero if the library supports + * QUIC version |version|. + */ +NGTCP2_EXTERN int ngtcp2_is_supported_version(uint32_t version); + +/* + * @function + * + * `ngtcp2_is_reserved_version` returns nonzero if |version| is a + * reserved version. + */ +NGTCP2_EXTERN int ngtcp2_is_reserved_version(uint32_t version); + +/** + * @function + * + * `ngtcp2_select_version` selects and returns a version from the + * version set |offered_versions| of |offered_versionslen| elements. + * |preferred_versions| of |preferred_versionslen| elements specifies + * the preference of versions, which is sorted in the order of + * preference. All versions included in |preferred_versions| must be + * supported by the library, that is, passing a version to + * `ngtcp2_is_supported_version` must return nonzero. This function + * is intended to be used by client when it receives Version + * Negotiation packet. If no version is selected, this function + * returns 0. + */ +NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions, + size_t preferred_versionslen, + const uint32_t *offered_versions, + size_t offered_versionslen); + +/* + * Versioned function wrappers + */ + +/* + * `ngtcp2_conn_read_pkt` is a wrapper around + * `ngtcp2_conn_read_pkt_versioned` to set the correct struct version. + */ +#define ngtcp2_conn_read_pkt(CONN, PATH, PI, PKT, PKTLEN, TS) \ + ngtcp2_conn_read_pkt_versioned((CONN), (PATH), NGTCP2_PKT_INFO_VERSION, \ + (PI), (PKT), (PKTLEN), (TS)) + +/* + * `ngtcp2_conn_write_pkt` is a wrapper around + * `ngtcp2_conn_write_pkt_versioned` to set the correct struct + * version. + */ +#define ngtcp2_conn_write_pkt(CONN, PATH, PI, DEST, DESTLEN, TS) \ + ngtcp2_conn_write_pkt_versioned((CONN), (PATH), NGTCP2_PKT_INFO_VERSION, \ + (PI), (DEST), (DESTLEN), (TS)) + +/* + * `ngtcp2_conn_write_stream` is a wrapper around + * `ngtcp2_conn_write_stream_versioned` to set the correct struct + * version. + */ +#define ngtcp2_conn_write_stream(CONN, PATH, PI, DEST, DESTLEN, PDATALEN, \ + FLAGS, STREAM_ID, DATA, DATALEN, TS) \ + ngtcp2_conn_write_stream_versioned( \ + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (PDATALEN), (FLAGS), (STREAM_ID), (DATA), (DATALEN), (TS)) + +/* + * `ngtcp2_conn_writev_stream` is a wrapper around + * `ngtcp2_conn_writev_stream_versioned` to set the correct struct + * version. + */ +#define ngtcp2_conn_writev_stream(CONN, PATH, PI, DEST, DESTLEN, PDATALEN, \ + FLAGS, STREAM_ID, DATAV, DATAVCNT, TS) \ + ngtcp2_conn_writev_stream_versioned( \ + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (PDATALEN), (FLAGS), (STREAM_ID), (DATAV), (DATAVCNT), (TS)) + +/* + * `ngtcp2_conn_writev_datagram` is a wrapper around + * `ngtcp2_conn_writev_datagram_versioned` to set the correct struct + * version. + */ +#define ngtcp2_conn_writev_datagram(CONN, PATH, PI, DEST, DESTLEN, PACCEPTED, \ + FLAGS, DGRAM_ID, DATAV, DATAVCNT, TS) \ + ngtcp2_conn_writev_datagram_versioned( \ + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (PACCEPTED), (FLAGS), (DGRAM_ID), (DATAV), (DATAVCNT), (TS)) + +/* + * `ngtcp2_conn_write_connection_close` is a wrapper around + * `ngtcp2_conn_write_connection_close_versioned` to set the correct + * struct version. + */ +#define ngtcp2_conn_write_connection_close(CONN, PATH, PI, DEST, DESTLEN, \ + CCERR, TS) \ + ngtcp2_conn_write_connection_close_versioned( \ + (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \ + (CCERR), (TS)) + +/* + * `ngtcp2_encode_transport_params` is a wrapper around + * `ngtcp2_encode_transport_params_versioned` to set the correct + * struct version. + */ +#define ngtcp2_encode_transport_params(DEST, DESTLEN, EXTTYPE, PARAMS) \ + ngtcp2_encode_transport_params_versioned( \ + (DEST), (DESTLEN), (EXTTYPE), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS)) + +/* + * `ngtcp2_decode_transport_params` is a wrapper around + * `ngtcp2_decode_transport_params_versioned` to set the correct + * struct version. + */ +#define ngtcp2_decode_transport_params(PARAMS, EXTTYPE, DATA, DATALEN) \ + ngtcp2_decode_transport_params_versioned( \ + NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (EXTTYPE), (DATA), (DATALEN)) + +/* + * `ngtcp2_conn_client_new` is a wrapper around + * `ngtcp2_conn_client_new_versioned` to set the correct struct + * version. + */ +#define ngtcp2_conn_client_new(PCONN, DCID, SCID, PATH, VERSION, CALLBACKS, \ + SETTINGS, PARAMS, MEM, USER_DATA) \ + ngtcp2_conn_client_new_versioned( \ + (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \ + (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \ + NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA)) + +/* + * `ngtcp2_conn_server_new` is a wrapper around + * `ngtcp2_conn_server_new_versioned` to set the correct struct + * version. + */ +#define ngtcp2_conn_server_new(PCONN, DCID, SCID, PATH, VERSION, CALLBACKS, \ + SETTINGS, PARAMS, MEM, USER_DATA) \ + ngtcp2_conn_server_new_versioned( \ + (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \ + (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \ + NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA)) + +/* + * `ngtcp2_conn_set_early_remote_transport_params` is a wrapper around + * `ngtcp2_conn_set_early_remote_transport_params_versioned` to set + * the correct struct version. + */ +#define ngtcp2_conn_set_early_remote_transport_params(CONN, PARAMS) \ + ngtcp2_conn_set_early_remote_transport_params_versioned( \ + (CONN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS)) + +/* + * `ngtcp2_conn_set_local_transport_params` is a wrapper around + * `ngtcp2_conn_set_local_transport_params_versioned` to set the + * correct struct version. + */ +#define ngtcp2_conn_set_local_transport_params(CONN, PARAMS) \ + ngtcp2_conn_set_local_transport_params_versioned( \ + (CONN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS)) + +/* + * `ngtcp2_transport_params_default` is a wrapper around + * `ngtcp2_transport_params_default_versioned` to set the correct + * struct version. + */ +#define ngtcp2_transport_params_default(PARAMS) \ + ngtcp2_transport_params_default_versioned(NGTCP2_TRANSPORT_PARAMS_VERSION, \ + (PARAMS)) + +/* + * `ngtcp2_conn_get_conn_stat` is a wrapper around + * `ngtcp2_conn_get_conn_stat_versioned` to set the correct struct + * version. + */ +#define ngtcp2_conn_get_conn_stat(CONN, CSTAT) \ + ngtcp2_conn_get_conn_stat_versioned((CONN), NGTCP2_CONN_STAT_VERSION, (CSTAT)) + +/* + * `ngtcp2_settings_default` is a wrapper around + * `ngtcp2_settings_default_versioned` to set the correct struct + * version. + */ +#define ngtcp2_settings_default(SETTINGS) \ + ngtcp2_settings_default_versioned(NGTCP2_SETTINGS_VERSION, (SETTINGS)) + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* NGTCP2_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h new file mode 100644 index 0000000..4736b51 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h @@ -0,0 +1,893 @@ +/* + * ngtcp2 + * + * Copyright (c) 2019 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_CRYPTO_H +#define NGTCP2_CRYPTO_H + +#include <ngtcp2/ngtcp2.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef WIN32 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include <ws2tcpip.h> +#endif /* WIN32 */ + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_INITIAL_SECRETLEN` is the length of secret + * for Initial packets. + */ +#define NGTCP2_CRYPTO_INITIAL_SECRETLEN 32 + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_INITIAL_KEYLEN` is the length of key for + * Initial packets. + */ +#define NGTCP2_CRYPTO_INITIAL_KEYLEN 16 + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_INITIAL_IVLEN` is the length of IV for + * Initial packets. + */ +#define NGTCP2_CRYPTO_INITIAL_IVLEN 12 + +/** + * @function + * + * `ngtcp2_crypto_ctx_tls` initializes |ctx| by extracting negotiated + * ciphers and message digests from native TLS session + * |tls_native_handle|. This is used for encrypting/decrypting + * Handshake and Short header packets. + * + * If libngtcp2_crypto_openssl is linked, |tls_native_handle| must be + * a pointer to SSL object. + */ +NGTCP2_EXTERN ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_tls(ngtcp2_crypto_ctx *ctx, + void *tls_native_handle); + +/** + * @function + * + * `ngtcp2_crypto_ctx_tls_early` initializes |ctx| by extracting early + * ciphers and message digests from native TLS session + * |tls_native_handle|. This is used for encrypting/decrypting 0RTT + * packets. + * + * If libngtcp2_crypto_openssl is linked, |tls_native_handle| must be + * a pointer to SSL object. + */ +NGTCP2_EXTERN ngtcp2_crypto_ctx * +ngtcp2_crypto_ctx_tls_early(ngtcp2_crypto_ctx *ctx, void *tls_native_handle); + +/** + * @function + * + * `ngtcp2_crypto_md_init` initializes |md| with the provided + * |md_native_handle| which is an underlying message digest object. + * + * If libngtcp2_crypto_openssl is linked, |md_native_handle| must be a + * pointer to EVP_MD. + * + * If libngtcp2_crypto_gnutls is linked, |md_native_handle| must be + * gnutls_mac_algorithm_t casted to ``void *``. + * + * If libngtcp2_crypto_boringssl is linked, |md_native_handle| must be + * a pointer to EVP_MD. + */ +NGTCP2_EXTERN ngtcp2_crypto_md *ngtcp2_crypto_md_init(ngtcp2_crypto_md *md, + void *md_native_handle); + +/** + * @function + * + * `ngtcp2_crypto_md_hashlen` returns the length of |md| output. + */ +NGTCP2_EXTERN size_t ngtcp2_crypto_md_hashlen(const ngtcp2_crypto_md *md); + +/** + * @function + * + * `ngtcp2_crypto_aead_keylen` returns the length of key for |aead|. + */ +NGTCP2_EXTERN size_t ngtcp2_crypto_aead_keylen(const ngtcp2_crypto_aead *aead); + +/** + * @function + * + * `ngtcp2_crypto_aead_noncelen` returns the length of nonce for + * |aead|. + */ +NGTCP2_EXTERN size_t +ngtcp2_crypto_aead_noncelen(const ngtcp2_crypto_aead *aead); + +/** + * @function + * + * `ngtcp2_crypto_hkdf_extract` performs HKDF extract operation. The + * result is the length of |md| and is stored to the buffer pointed by + * |dest|. The caller is responsible to specify the buffer that can + * store the output. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_hkdf_extract(uint8_t *dest, const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen, + const uint8_t *salt, size_t saltlen); + +/** + * @function + * + * `ngtcp2_crypto_hkdf_expand` performs HKDF expand operation. The + * result is |destlen| bytes long and is stored to the buffer pointed + * by |dest|. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_hkdf_expand(uint8_t *dest, size_t destlen, + const ngtcp2_crypto_md *md, + const uint8_t *secret, + size_t secretlen, + const uint8_t *info, + size_t infolen); + +/** + * @function + * + * `ngtcp2_crypto_hkdf` performs HKDF operation. The result is + * |destlen| bytes long and is stored to the buffer pointed by |dest|. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_hkdf(uint8_t *dest, size_t destlen, + const ngtcp2_crypto_md *md, + const uint8_t *secret, size_t secretlen, + const uint8_t *salt, size_t saltlen, + const uint8_t *info, size_t infolen); + +/** + * @function + * + * `ngtcp2_crypto_hkdf_expand_label` performs HKDF expand label. The + * result is |destlen| bytes long and is stored to the buffer pointed + * by |dest|. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_hkdf_expand_label(uint8_t *dest, size_t destlen, + const ngtcp2_crypto_md *md, + const uint8_t *secret, + size_t secretlen, + const uint8_t *label, + size_t labellen); + +/** + * @enum + * + * :type:`ngtcp2_crypto_side` indicates which side the application + * implements; client or server. + */ +typedef enum ngtcp2_crypto_side { + /** + * :enum:`NGTCP2_CRYPTO_SIDE_CLIENT` indicates that the application + * is client. + */ + NGTCP2_CRYPTO_SIDE_CLIENT, + /** + * :enum:`NGTCP2_CRYPTO_SIDE_SERVER` indicates that the application + * is server. + */ + NGTCP2_CRYPTO_SIDE_SERVER +} ngtcp2_crypto_side; + +/** + * @function + * + * `ngtcp2_crypto_packet_protection_ivlen` returns the length of IV + * used to encrypt QUIC packet. + */ +NGTCP2_EXTERN size_t +ngtcp2_crypto_packet_protection_ivlen(const ngtcp2_crypto_aead *aead); + +/** + * @function + * + * `ngtcp2_crypto_encrypt` encrypts |plaintext| of length + * |plaintextlen| and writes the ciphertext into the buffer pointed by + * |dest|. The length of ciphertext is plaintextlen + + * :member:`aead->max_overhead <ngtcp2_crypto_aead.max_overhead>` + * bytes long. |dest| must have enough capacity to store the + * ciphertext. It is allowed to specify the same value to |dest| and + * |plaintext|. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_encrypt(uint8_t *dest, + const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *plaintext, + size_t plaintextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen); + +/** + * @function + * + * `ngtcp2_crypto_encrypt_cb` is a wrapper function around + * `ngtcp2_crypto_encrypt`. It can be directly passed to + * :member:`ngtcp2_callbacks.encrypt` field. + * + * This function returns 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_encrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *plaintext, size_t plaintextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen); + +/** + * @function + * + * `ngtcp2_crypto_decrypt` decrypts |ciphertext| of length + * |ciphertextlen| and writes the plaintext into the buffer pointed by + * |dest|. The length of plaintext is ciphertextlen - + * :member:`aead->max_overhead <ngtcp2_crypto_aead.max_overhead>` + * bytes long. |dest| must have enough capacity to store the + * plaintext. It is allowed to specify the same value to |dest| and + * |ciphertext|. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_decrypt(uint8_t *dest, + const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *ciphertext, + size_t ciphertextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen); + +/** + * @function + * + * `ngtcp2_crypto_decrypt_cb` is a wrapper function around + * `ngtcp2_crypto_decrypt`. It can be directly passed to + * :member:`ngtcp2_callbacks.decrypt` field. + * + * This function returns 0 if it succeeds, or + * :macro:`NGTCP2_ERR_TLS_DECRYPT`. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_decrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead, + const ngtcp2_crypto_aead_ctx *aead_ctx, + const uint8_t *ciphertext, size_t ciphertextlen, + const uint8_t *nonce, size_t noncelen, + const uint8_t *aad, size_t aadlen); + +/** + * @function + * + * `ngtcp2_crypto_hp_mask` generates mask which is used in packet + * header encryption. The mask is written to the buffer pointed by + * |dest|. The sample is passed as |sample| which is + * :macro:`NGTCP2_HP_SAMPLELEN` bytes long. The length of mask must + * be at least :macro:`NGTCP2_HP_MASKLEN`. The library only uses the + * first :macro:`NGTCP2_HP_MASKLEN` bytes of the produced mask. The + * buffer pointed by |dest| must have at least + * :macro:`NGTCP2_HP_SAMPLELEN` bytes available. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_hp_mask(uint8_t *dest, + const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx, + const uint8_t *sample); + +/** + * @function + * + * `ngtcp2_crypto_hp_mask_cb` is a wrapper function around + * `ngtcp2_crypto_hp_mask`. It can be directly passed to + * :member:`ngtcp2_callbacks.hp_mask` field. + * + * This function returns 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_hp_mask_cb(uint8_t *dest, const ngtcp2_crypto_cipher *hp, + const ngtcp2_crypto_cipher_ctx *hp_ctx, + const uint8_t *sample); + +/** + * @function + * + * `ngtcp2_crypto_derive_and_install_rx_key` derives the rx keys from + * |secret| and installs new keys to |conn|. + * + * If |key| is not NULL, the derived packet protection key for + * decryption is written to the buffer pointed by |key|. If |iv| is + * not NULL, the derived packet protection IV for decryption is + * written to the buffer pointed by |iv|. If |hp| is not NULL, the + * derived header protection key for decryption is written to the + * buffer pointed by |hp|. + * + * |secretlen| specifies the length of |secret|. + * + * The length of packet protection key and header protection key is + * `ngtcp2_crypto_aead_keylen(ctx->aead) <ngtcp2_crypto_aead_keylen>`, + * and the length of packet protection IV is + * `ngtcp2_crypto_packet_protection_ivlen(ctx->aead) + * <ngtcp2_crypto_packet_protection_ivlen>` where ctx is obtained by + * `ngtcp2_crypto_ctx_tls` (or `ngtcp2_crypto_ctx_tls_early` if + * |level| == :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`). + * + * In the first call of this function, it calls + * `ngtcp2_conn_set_crypto_ctx` (or `ngtcp2_conn_set_early_crypto_ctx` + * if |level| == + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to set + * negotiated AEAD and message digest algorithm. After the successful + * call of this function, application can use + * `ngtcp2_conn_get_crypto_ctx` (or `ngtcp2_conn_get_early_crypto_ctx` + * if |level| == + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to get + * :type:`ngtcp2_crypto_ctx`. + * + * If |conn| is initialized as client, and |level| is + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_APPLICATION`, this + * function retrieves a remote QUIC transport parameters extension + * from an object obtained by `ngtcp2_conn_get_tls_native_handle` and + * sets it to |conn| by calling + * `ngtcp2_conn_decode_remote_transport_params`. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_rx_key( + ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp, + ngtcp2_crypto_level level, const uint8_t *secret, size_t secretlen); + +/** + * @function + * + * `ngtcp2_crypto_derive_and_install_tx_key` derives the tx keys from + * |secret| and installs new keys to |conn|. + * + * If |key| is not NULL, the derived packet protection key for + * encryption is written to the buffer pointed by |key|. If |iv| is + * not NULL, the derived packet protection IV for encryption is + * written to the buffer pointed by |iv|. If |hp| is not NULL, the + * derived header protection key for encryption is written to the + * buffer pointed by |hp|. + * + * |secretlen| specifies the length of |secret|. + * + * The length of packet protection key and header protection key is + * `ngtcp2_crypto_aead_keylen(ctx->aead) <ngtcp2_crypto_aead_keylen>`, + * and the length of packet protection IV is + * `ngtcp2_crypto_packet_protection_ivlen(ctx->aead) + * <ngtcp2_crypto_packet_protection_ivlen>` where ctx is obtained by + * `ngtcp2_crypto_ctx_tls` (or `ngtcp2_crypto_ctx_tls_early` if + * |level| == :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`). + * + * In the first call of this function, it calls + * `ngtcp2_conn_set_crypto_ctx` (or `ngtcp2_conn_set_early_crypto_ctx` + * if |level| == + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to set + * negotiated AEAD and message digest algorithm. After the successful + * call of this function, application can use + * `ngtcp2_conn_get_crypto_ctx` (or `ngtcp2_conn_get_early_crypto_ctx` + * if |level| == + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to get + * :type:`ngtcp2_crypto_ctx`. + * + * If |conn| is initialized as server, and |level| is + * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_APPLICATION`, this + * function retrieves a remote QUIC transport parameters extension + * from an object obtained by `ngtcp2_conn_get_tls_native_handle` and + * sets it to |conn| by calling + * `ngtcp2_conn_decode_remote_transport_params`. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_tx_key( + ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp, + ngtcp2_crypto_level level, const uint8_t *secret, size_t secretlen); + +/** + * @function + * + * `ngtcp2_crypto_update_key` updates traffic keying materials. + * + * The new traffic secret for decryption is written to the buffer + * pointed by |rx_secret|. The length of secret is |secretlen| bytes, + * and |rx_secret| must point to the buffer which has enough capacity. + * + * The new traffic secret for encryption is written to the buffer + * pointed by |tx_secret|. The length of secret is |secretlen| bytes, + * and |tx_secret| must point to the buffer which has enough capacity. + * + * The derived packet protection key for decryption is written to the + * buffer pointed by |rx_key|. The derived packet protection IV for + * decryption is written to the buffer pointed by |rx_iv|. + * |rx_aead_ctx| must be constructed with |rx_key|. + * + * The derived packet protection key for encryption is written to the + * buffer pointed by |tx_key|. The derived packet protection IV for + * encryption is written to the buffer pointed by |tx_iv|. + * |tx_aead_ctx| must be constructed with |rx_key|. + * + * |current_rx_secret| and |current_tx_secret| are the current traffic + * secrets for decryption and encryption. |secretlen| specifies the + * length of |rx_secret| and |tx_secret|. + * + * The length of packet protection key and header protection key is + * `ngtcp2_crypto_aead_keylen(ctx->aead) <ngtcp2_crypto_aead_keylen>`, + * and the length of packet protection IV is + * `ngtcp2_crypto_packet_protection_ivlen(ctx->aead) + * <ngtcp2_crypto_packet_protection_ivlen>` where ctx is obtained by + * `ngtcp2_crypto_ctx_tls`. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_update_key( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen); + +/** + * @function + * + * `ngtcp2_crypto_update_key_cb` is a wrapper function around + * `ngtcp2_crypto_update_key`. It can be directly passed to + * :member:`ngtcp2_callbacks.update_key` field. + * + * This function returns 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. + */ +NGTCP2_EXTERN int ngtcp2_crypto_update_key_cb( + ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret, + ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv, + ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv, + const uint8_t *current_rx_secret, const uint8_t *current_tx_secret, + size_t secretlen, void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_client_initial_cb` installs initial secrets and + * encryption keys and sets QUIC transport parameters. + * + * This function can be directly passed to + * :member:`ngtcp2_callbacks.client_initial` field. It is only used + * by client. + * + * This function returns 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. + */ +NGTCP2_EXTERN int ngtcp2_crypto_client_initial_cb(ngtcp2_conn *conn, + void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_recv_retry_cb` re-installs initial secrets in + * response to incoming Retry packet. + * + * This function can be directly passed to + * :member:`ngtcp2_callbacks.recv_retry` field. It is only used + * by client. + * + * This function returns 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. + */ +NGTCP2_EXTERN int ngtcp2_crypto_recv_retry_cb(ngtcp2_conn *conn, + const ngtcp2_pkt_hd *hd, + void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_recv_client_initial_cb` installs initial secrets in + * response to an incoming Initial packet from client, and sets QUIC + * transport parameters. + * + * This function can be directly passed to + * :member:`ngtcp2_callbacks.recv_client_initial` field. It is + * only used by server. + * + * This function returns 0 if it succeeds, or + * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`. + */ +NGTCP2_EXTERN int ngtcp2_crypto_recv_client_initial_cb(ngtcp2_conn *conn, + const ngtcp2_cid *dcid, + void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_read_write_crypto_data` reads CRYPTO data |data| of + * length |datalen| in encryption level |crypto_level| and may feed + * outgoing CRYPTO data to |conn|. This function can drive handshake. + * This function can be also used after handshake completes. It is + * allowed to call this function with |datalen| == 0. In this case, + * no additional read operation is done. + * + * This function returns 0 if it succeeds, or a negative error code. + * The generic error code is -1 if a specific error code is not + * suitable. The error codes less than -10000 are specific to + * underlying TLS implementation. For OpenSSL, the error codes are + * defined in *ngtcp2_crypto_openssl.h*. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_read_write_crypto_data(ngtcp2_conn *conn, + ngtcp2_crypto_level crypto_level, + const uint8_t *data, size_t datalen); + +/** + * @function + * + * `ngtcp2_crypto_recv_crypto_data_cb` is a wrapper function around + * `ngtcp2_crypto_read_write_crypto_data`. It can be directly passed + * to :member:`ngtcp2_callbacks.recv_crypto_data` field. + * + * If this function is used, the TLS implementation specific error + * codes described in `ngtcp2_crypto_read_write_crypto_data` are + * treated as if it returns -1. Do not use this function if an + * application wishes to use the TLS implementation specific error + * codes. + */ +NGTCP2_EXTERN int ngtcp2_crypto_recv_crypto_data_cb( + ngtcp2_conn *conn, ngtcp2_crypto_level crypto_level, uint64_t offset, + const uint8_t *data, size_t datalen, void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_generate_stateless_reset_token` generates a + * stateless reset token using HKDF extraction using the given |cid| + * and static key |secret| as input. The token will be written to + * the buffer pointed by |token| and it must have a capacity of at + * least :macro:`NGTCP2_STATELESS_RESET_TOKENLEN` bytes. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_generate_stateless_reset_token( + uint8_t *token, const uint8_t *secret, size_t secretlen, + const ngtcp2_cid *cid); + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_TOKEN_RAND_DATALEN` is the length of random + * data added to a token generated by + * `ngtcp2_crypto_generate_retry_token` or + * `ngtcp2_crypto_generate_regular_token`. + */ +#define NGTCP2_CRYPTO_TOKEN_RAND_DATALEN 32 + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY` is the magic byte for + * Retry token generated by `ngtcp2_crypto_generate_retry_token`. + */ +#define NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY 0xb6 + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR` is the magic byte for a + * token generated by `ngtcp2_crypto_generate_regular_token`. + */ +#define NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR 0x36 + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN` is the maximum length of + * a token generated by `ngtcp2_crypto_generate_retry_token`. + */ +#define NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN \ + (/* magic = */ 1 + /* cid len = */ 1 + NGTCP2_MAX_CIDLEN + \ + sizeof(ngtcp2_tstamp) + /* aead tag = */ 16 + \ + NGTCP2_CRYPTO_TOKEN_RAND_DATALEN) + +/** + * @macro + * + * :macro:`NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN` is the maximum length + * of a token generated by `ngtcp2_crypto_generate_regular_token`. + */ +#define NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN \ + (/* magic = */ 1 + sizeof(ngtcp2_tstamp) + /* aead tag = */ 16 + \ + NGTCP2_CRYPTO_TOKEN_RAND_DATALEN) + +/** + * @function + * + * `ngtcp2_crypto_generate_retry_token` generates a token in the + * buffer pointed by |token| that is sent with Retry packet. The + * buffer pointed by |token| must have at least + * :macro:`NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN` bytes long. The + * successfully generated token starts with + * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY`. |secret| of length + * |secretlen| is an initial keying material to generate keys to + * encrypt the token. |version| is QUIC version. |remote_addr| of + * length |remote_addrlen| is an address of client. |retry_scid| is a + * Source Connection ID chosen by server and set in Retry packet. + * |odcid| is a Destination Connection ID in Initial packet sent by + * client. |ts| is the timestamp when the token is generated. + * + * This function returns the length of generated token if it succeeds, + * or -1. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_retry_token( + uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_crypto_verify_retry_token` verifies Retry token stored in + * the buffer pointed by |token| of length |tokenlen|. |secret| of + * length |secretlen| is an initial keying material to generate keys + * to decrypt the token. |version| is QUIC version of the Initial + * packet that contains this token. |remote_addr| of length + * |remote_addrlen| is an address of client. |dcid| is a Destination + * Connection ID in Initial packet sent by client. |timeout| is the + * period during which the token is valid. |ts| is the current + * timestamp. When validation succeeds, the extracted Destination + * Connection ID (which is the Destination Connection ID in Initial + * packet sent by client that triggered Retry packet) is stored to the + * buffer pointed by |odcid|. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_verify_retry_token( + ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen, + const uint8_t *secret, size_t secretlen, uint32_t version, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_crypto_generate_regular_token` generates a token in the + * buffer pointed by |token| that is sent with NEW_TOKEN frame. The + * buffer pointed by |token| must have at least + * :macro:`NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN` bytes long. The + * successfully generated token starts with + * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR`. |secret| of length + * |secretlen| is an initial keying material to generate keys to + * encrypt the token. |remote_addr| of length |remote_addrlen| is an + * address of client. |ts| is the timestamp when the token is + * generated. + * + * This function returns the length of generated token if it succeeds, + * or -1. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_regular_token( + uint8_t *token, const uint8_t *secret, size_t secretlen, + const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen, + ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_crypto_verify_regular_token` verifies a regular token + * stored in the buffer pointed by |token| of length |tokenlen|. + * |secret| of length |secretlen| is an initial keying material to + * generate keys to decrypt the token. |remote_addr| of length + * |remote_addrlen| is an address of client. |timeout| is the period + * during which the token is valid. |ts| is the current timestamp. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int ngtcp2_crypto_verify_regular_token( + const uint8_t *token, size_t tokenlen, const uint8_t *secret, + size_t secretlen, const ngtcp2_sockaddr *remote_addr, + ngtcp2_socklen remote_addrlen, ngtcp2_duration timeout, ngtcp2_tstamp ts); + +/** + * @function + * + * `ngtcp2_crypto_write_connection_close` writes Initial packet + * containing CONNECTION_CLOSE with the given |error_code| and the + * optional |reason| of length |reasonlen| to the buffer pointed by + * |dest| of length |destlen|. This function is designed for server + * to close connection without committing the state when validating + * Retry token fails. This function must not be used by client. The + * |dcid| must be the Source Connection ID in Initial packet from + * client. The |scid| must be the Destination Connection ID in + * Initial packet from client. |scid| is used to derive initial + * keying materials. + * + * This function wraps around `ngtcp2_pkt_write_connection_close` for + * easier use. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_write_connection_close( + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason, + size_t reasonlen); + +/** + * @function + * + * `ngtcp2_crypto_write_retry` writes Retry packet to the buffer + * pointed by |dest| of length |destlen|. |odcid| specifies Original + * Destination Connection ID. |token| specifies Retry Token, and + * |tokenlen| specifies its length. + * + * This function wraps around `ngtcp2_pkt_write_retry` for easier use. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_write_retry( + uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid, + const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token, + size_t tokenlen); + +/** + * @function + * + * `ngtcp2_crypto_aead_ctx_encrypt_init` initializes |aead_ctx| with + * new AEAD cipher context object for encryption which is constructed + * to use |key| as encryption key. |aead| specifies AEAD cipher to + * use. |noncelen| is the length of nonce. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_aead_ctx_encrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx, + const ngtcp2_crypto_aead *aead, + const uint8_t *key, size_t noncelen); + +/** + * @function + * + * `ngtcp2_crypto_aead_ctx_decrypt_init` initializes |aead_ctx| with + * new AEAD cipher context object for decryption which is constructed + * to use |key| as encryption key. |aead| specifies AEAD cipher to + * use. |noncelen| is the length of nonce. + * + * This function returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_aead_ctx_decrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx, + const ngtcp2_crypto_aead *aead, + const uint8_t *key, size_t noncelen); + +/** + * @function + * + * `ngtcp2_crypto_aead_ctx_free` frees up resources used by + * |aead_ctx|. This function does not free the memory pointed by + * |aead_ctx| itself. + */ +NGTCP2_EXTERN void +ngtcp2_crypto_aead_ctx_free(ngtcp2_crypto_aead_ctx *aead_ctx); + +/** + * @function + * + * `ngtcp2_crypto_delete_crypto_aead_ctx_cb` deletes the given |aead_ctx|. + * + * This function can be directly passed to + * :member:`ngtcp2_callbacks.delete_crypto_aead_ctx` field. + */ +NGTCP2_EXTERN void ngtcp2_crypto_delete_crypto_aead_ctx_cb( + ngtcp2_conn *conn, ngtcp2_crypto_aead_ctx *aead_ctx, void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_delete_crypto_cipher_ctx_cb` deletes the given + * |cipher_ctx|. + * + * This function can be directly passed to + * :member:`ngtcp2_callbacks.delete_crypto_cipher_ctx` field. + */ +NGTCP2_EXTERN void ngtcp2_crypto_delete_crypto_cipher_ctx_cb( + ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_get_path_challenge_data_cb` writes unpredictable + * sequence of :macro:`NGTCP2_PATH_CHALLENGE_DATALEN` bytes to |data| + * which is sent with PATH_CHALLENGE frame. + * + * This function can be directly passed to + * :member:`ngtcp2_callbacks.get_path_challenge_data` field. + */ +NGTCP2_EXTERN int ngtcp2_crypto_get_path_challenge_data_cb(ngtcp2_conn *conn, + uint8_t *data, + void *user_data); + +/** + * @function + * + * `ngtcp2_crypto_version_negotiation_cb` installs Initial keys for + * |version| which is negotiated or being negotiated. |client_dcid| + * is the destination connection ID in first Initial packet of client. + * + * This function can be directly passed to + * :member:`ngtcp2_callbacks.version_negotiation` field. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_version_negotiation_cb(ngtcp2_conn *conn, uint32_t version, + const ngtcp2_cid *client_dcid, + void *user_data); + +typedef struct ngtcp2_crypto_conn_ref ngtcp2_crypto_conn_ref; + +/** + * @functypedef + * + * :type:`ngtcp2_crypto_get_conn` is a callback function to get a + * pointer to :type:`ngtcp2_conn` from |conn_ref|. The implementation + * must return non-NULL :type:`ngtcp2_conn` object. + */ +typedef ngtcp2_conn *(*ngtcp2_crypto_get_conn)( + ngtcp2_crypto_conn_ref *conn_ref); + +/** + * @struct + * + * :type:`ngtcp2_crypto_conn_ref` is a structure to get a pointer to + * :type:`ngtcp2_conn`. It is meant to be set to TLS native handle as + * an application specific data (e.g. SSL_set_app_data in OpenSSL). + */ +typedef struct ngtcp2_crypto_conn_ref { + /** + * :member:`get_conn` is a callback function to get a pointer to + * :type:`ngtcp2_conn` object. + */ + ngtcp2_crypto_get_conn get_conn; + /** + * :member:`user_data` is a pointer to arbitrary user data. + */ + void *user_data; +} ngtcp2_crypto_conn_ref; + +#ifdef __cplusplus +} +#endif + +#endif /* NGTCP2_CRYPTO_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h new file mode 100644 index 0000000..af5503f --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h @@ -0,0 +1,107 @@ +/* + * ngtcp2 + * + * Copyright (c) 2020 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef NGTCP2_CRYPTO_GNUTLS_H +#define NGTCP2_CRYPTO_GNUTLS_H + +#include <ngtcp2/ngtcp2.h> + +#include <gnutls/gnutls.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @function + * + * `ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level` + * translates |gtls_level| to :type:`ngtcp2_crypto_level`. This + * function is only available for GnuTLS backend. + */ +NGTCP2_EXTERN ngtcp2_crypto_level +ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level( + gnutls_record_encryption_level_t gtls_level); + +/** + * @function + * + * `ngtcp2_crypto_gnutls_from_ngtcp2_crypto_level` translates + * |crypto_level| to gnutls_record_encryption_level_t. This function + * is only available for GnuTLS backend. + */ +NGTCP2_EXTERN gnutls_record_encryption_level_t +ngtcp2_crypto_gnutls_from_ngtcp2_level(ngtcp2_crypto_level crypto_level); + +/** + * @function + * + * `ngtcp2_crypto_gnutls_configure_server_session` configures + * |session| for server side QUIC connection. It performs the + * following modifications: + * + * - Set gnutls_handshake_set_secret_function. + * - Set gnutls_handshake_set_read_function. + * - Set gnutls_alert_set_read_function. + * - Register a TLS extension handler for QUIC Transport Parameters. + * + * Application must set a pointer to :type:`ngtcp2_crypto_conn_ref` to + * gnutls_session_t object by calling gnutls_session_set_ptr, and + * :type:`ngtcp2_crypto_conn_ref` object must have + * :member:`ngtcp2_crypto_conn_ref.get_conn` field assigned to get + * :type:`ngtcp2_conn`. + * + * It returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_gnutls_configure_server_session(gnutls_session_t session); + +/** + * @function + * + * `ngtcp2_crypto_gnutls_configure_client_session` configures + * |session| for client side QUIC connection. It performs the + * following modifications: + * + * - Set gnutls_handshake_set_secret_function. + * - Set gnutls_handshake_set_read_function. + * - Set gnutls_alert_set_read_function. + * - Register a TLS extension handler for QUIC Transport Parameters. + * + * Application must set a pointer to :type:`ngtcp2_crypto_conn_ref` to + * gnutls_session_t object by calling gnutls_session_set_ptr, and + * :type:`ngtcp2_crypto_conn_ref` object must have + * :member:`ngtcp2_crypto_conn_ref.get_conn` field assigned to get + * :type:`ngtcp2_conn`. + * + * It returns 0 if it succeeds, or -1. + */ +NGTCP2_EXTERN int +ngtcp2_crypto_gnutls_configure_client_session(gnutls_session_t session); + +#ifdef __cplusplus +} +#endif + +#endif /* NGTCP2_CRYPTO_GNUTLS_H */ diff --git a/src/contrib/libngtcp2/ngtcp2/version.h b/src/contrib/libngtcp2/ngtcp2/version.h new file mode 100644 index 0000000..54b2035 --- /dev/null +++ b/src/contrib/libngtcp2/ngtcp2/version.h @@ -0,0 +1,51 @@ +/* + * ngtcp2 + * + * Copyright (c) 2016 ngtcp2 contributors + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VERSION_H +#define VERSION_H + +/** + * @macrosection + * + * Library version macros + */ + +/** + * @macro + * + * Version number of the ngtcp2 library release. + */ +#define NGTCP2_VERSION "0.13.1" + +/** + * @macro + * + * Numerical representation of the version number of the ngtcp2 + * library release. This is a 24 bit number with 8 bits for major + * number, 8 bits for minor and 8 bits for patch. Version 1.2.3 + * becomes 0x010203. + */ +#define NGTCP2_VERSION_NUM 0x000d01 + +#endif /* VERSION_H */ diff --git a/src/contrib/licenses/0BSD b/src/contrib/licenses/0BSD new file mode 100644 index 0000000..56c5528 --- /dev/null +++ b/src/contrib/licenses/0BSD @@ -0,0 +1,12 @@ +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + diff --git a/src/contrib/licenses/BSD-3-Clause b/src/contrib/licenses/BSD-3-Clause new file mode 100644 index 0000000..8041f21 --- /dev/null +++ b/src/contrib/licenses/BSD-3-Clause @@ -0,0 +1,21 @@ +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/contrib/licenses/LGPL-2.0 b/src/contrib/licenses/LGPL-2.0 new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/src/contrib/licenses/LGPL-2.0 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/src/contrib/licenses/LGPL-2.1 b/src/contrib/licenses/LGPL-2.1 new file mode 100644 index 0000000..27bb434 --- /dev/null +++ b/src/contrib/licenses/LGPL-2.1 @@ -0,0 +1,503 @@ +Valid-License-Identifier: LGPL-2.1 +Valid-License-Identifier: LGPL-2.1+ +SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html +Usage-Guide: + To use this license in source code, put one of the following SPDX + tag/value pairs into a comment according to the placement + guidelines in the licensing rules documentation. + For 'GNU Lesser General Public License (LGPL) version 2.1 only' use: + SPDX-License-Identifier: LGPL-2.1 + For 'GNU Lesser General Public License (LGPL) version 2.1 or any later + version' use: + SPDX-License-Identifier: LGPL-2.1+ +License-Text: + +GNU LESSER GENERAL PUBLIC LICENSE +Version 2.1, February 1999 + +Copyright (C) 1991, 1999 Free Software Foundation, Inc. +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts as +the successor of the GNU Library Public License, version 2, hence the +version number 2.1.] + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public Licenses are +intended to guarantee your freedom to share and change free software--to +make sure the software is free for all its users. + +This license, the Lesser General Public License, applies to some specially +designated software packages--typically libraries--of the Free Software +Foundation and other authors who decide to use it. You can use it too, but +we suggest you first think carefully about whether this license or the +ordinary General Public License is the better strategy to use in any +particular case, based on the explanations below. + +When we speak of free software, we are referring to freedom of use, not +price. Our General Public Licenses are designed to make sure that you have +the freedom to distribute copies of free software (and charge for this +service if you wish); that you receive source code or can get it if you +want it; that you can change the software and use pieces of it in new free +programs; and that you are informed that you can do these things. + +To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for you if +you distribute copies of the library or if you modify it. + +For example, if you distribute copies of the library, whether gratis or for +a fee, you must give the recipients all the rights that we gave you. You +must make sure that they, too, receive or can get the source code. If you +link other code with the library, you must provide complete object files to +the recipients, so that they can relink them with the library after making +changes to the library and recompiling it. And you must show them these +terms so they know their rights. + +We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + +To protect each distributor, we want to make it very clear that there is no +warranty for the free library. Also, if the library is modified by someone +else and passed on, the recipients should know that what they have is not +the original version, so that the original author's reputation will not be +affected by problems that might be introduced by others. + +Finally, software patents pose a constant threat to the existence of any +free program. We wish to make sure that a company cannot effectively +restrict the users of a free program by obtaining a restrictive license +from a patent holder. Therefore, we insist that any patent license obtained +for a version of the library must be consistent with the full freedom of +use specified in this license. + +Most GNU software, including some libraries, is covered by the ordinary GNU +General Public License. This license, the GNU Lesser General Public +License, applies to certain designated libraries, and is quite different +from the ordinary General Public License. We use this license for certain +libraries in order to permit linking those libraries into non-free +programs. + +When a program is linked with a library, whether statically or using a +shared library, the combination of the two is legally speaking a combined +work, a derivative of the original library. The ordinary General Public +License therefore permits such linking only if the entire combination fits +its criteria of freedom. The Lesser General Public License permits more lax +criteria for linking other code with the library. + +We call this license the "Lesser" General Public License because it does +Less to protect the user's freedom than the ordinary General Public +License. It also provides other free software developers Less of an +advantage over competing non-free programs. These disadvantages are the +reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + +For example, on rare occasions, there may be a special need to encourage +the widest possible use of a certain library, so that it becomes a de-facto +standard. To achieve this, non-free programs must be allowed to use the +library. A more frequent case is that a free library does the same job as +widely used non-free libraries. In this case, there is little to gain by +limiting the free library to free software only, so we use the Lesser +General Public License. + +In other cases, permission to use a particular library in non-free programs +enables a greater number of people to use a large body of free +software. For example, permission to use the GNU C Library in non-free +programs enables many more people to use the whole GNU operating system, as +well as its variant, the GNU/Linux operating system. + +Although the Lesser General Public License is Less protective of the users' +freedom, it does ensure that the user of a program that is linked with the +Library has the freedom and the wherewithal to run that program using a +modified version of the Library. + +The precise terms and conditions for copying, distribution and modification +follow. Pay close attention to the difference between a "work based on the +library" and a "work that uses the library". The former contains code +derived from the library, whereas the latter must be combined with the +library in order to run. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License Agreement applies to any software library or other program + which contains a notice placed by the copyright holder or other + authorized party saying it may be distributed under the terms of this + Lesser General Public License (also called "this License"). Each + licensee is addressed as "you". + + A "library" means a collection of software functions and/or data + prepared so as to be conveniently linked with application programs + (which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work which + has been distributed under these terms. A "work based on the Library" + means either the Library or any derivative work under copyright law: + that is to say, a work containing the Library or a portion of it, either + verbatim or with modifications and/or translated straightforwardly into + another language. (Hereinafter, translation is included without + limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for making + modifications to it. For a library, complete source code means all the + source code for all modules it contains, plus any associated interface + definition files, plus the scripts used to control compilation and + installation of the library. + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + a program using the Library is not restricted, and output from such a + program is covered only if its contents constitute a work based on the + Library (independent of the use of the Library in a tool for writing + it). Whether that is true depends on what the Library does and what the + program that uses the Library does. + +1. You may copy and distribute verbatim copies of the Library's complete + source code as you receive it, in any medium, provided that you + conspicuously and appropriately publish on each copy an appropriate + copyright notice and disclaimer of warranty; keep intact all the notices + that refer to this License and to the absence of any warranty; and + distribute a copy of this License along with the Library. + + You may charge a fee for the physical act of transferring a copy, and + you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Library or any portion of it, + thus forming a work based on the Library, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices stating + that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no charge to + all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a table + of data to be supplied by an application program that uses the + facility, other than as an argument passed when the facility is + invoked, then you must make a good faith effort to ensure that, in + the event an application does not supply such function or table, the + facility still operates, and performs whatever part of its purpose + remains meaningful. + + (For example, a function in a library to compute square roots has a + purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must be + optional: if the application does not supply it, the square root + function must still compute square roots.) + + These requirements apply to the modified work as a whole. If + identifiable sections of that work are not derived from the Library, and + can be reasonably considered independent and separate works in + themselves, then this License, and its terms, do not apply to those + sections when you distribute them as separate works. But when you + distribute the same sections as part of a whole which is a work based on + the Library, the distribution of the whole must be on the terms of this + License, whose permissions for other licensees extend to the entire + whole, and thus to each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Library. + + In addition, mere aggregation of another work not based on the Library + with the Library (or with a work based on the Library) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may opt to apply the terms of the ordinary GNU General Public + License instead of this License to a given copy of the Library. To do + this, you must alter all the notices that refer to this License, so that + they refer to the ordinary GNU General Public License, version 2, + instead of to this License. (If a newer version than version 2 of the + ordinary GNU General Public License has appeared, then you can specify + that version instead if you wish.) Do not make any other change in these + notices. + + Once this change is made in a given copy, it is irreversible for that + copy, so the ordinary GNU General Public License applies to all + subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of the + Library into a program that is not a library. + +4. You may copy and distribute the Library (or a portion or derivative of + it, under Section 2) in object code or executable form under the terms + of Sections 1 and 2 above provided that you accompany it with the + complete corresponding machine-readable source code, which must be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange. + + If distribution of object code is made by offering access to copy from a + designated place, then offering equivalent access to copy the source + code from the same place satisfies the requirement to distribute the + source code, even though third parties are not compelled to copy the + source along with the object code. + +5. A program that contains no derivative of any portion of the Library, but + is designed to work with the Library by being compiled or linked with + it, is called a "work that uses the Library". Such a work, in isolation, + is not a derivative work of the Library, and therefore falls outside the + scope of this License. + + However, linking a "work that uses the Library" with the Library creates + an executable that is a derivative of the Library (because it contains + portions of the Library), rather than a "work that uses the + library". The executable is therefore covered by this License. Section 6 + states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file + that is part of the Library, the object code for the work may be a + derivative work of the Library even though the source code is + not. Whether this is true is especially significant if the work can be + linked without the Library, or if the work is itself a library. The + threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data structure + layouts and accessors, and small macros and small inline functions (ten + lines or less in length), then the use of the object file is + unrestricted, regardless of whether it is legally a derivative + work. (Executables containing this object code plus portions of the + Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may + distribute the object code for the work under the terms of Section + 6. Any executables containing that work also fall under Section 6, + whether or not they are linked directly with the Library itself. + +6. As an exception to the Sections above, you may also combine or link a + "work that uses the Library" with the Library to produce a work + containing portions of the Library, and distribute that work under terms + of your choice, provided that the terms permit modification of the work + for the customer's own use and reverse engineering for debugging such + modifications. + + You must give prominent notice with each copy of the work that the + Library is used in it and that the Library and its use are covered by + this License. You must supply a copy of this License. If the work during + execution displays copyright notices, you must include the copyright + notice for the Library among them, as well as a reference directing the + user to the copy of this License. Also, you must do one of these things: + + a) Accompany the work with the complete corresponding machine-readable + source code for the Library including whatever changes were used in + the work (which must be distributed under Sections 1 and 2 above); + and, if the work is an executable linked with the Library, with the + complete machine-readable "work that uses the Library", as object + code and/or source code, so that the user can modify the Library and + then relink to produce a modified executable containing the modified + Library. (It is understood that the user who changes the contents of + definitions files in the Library will not necessarily be able to + recompile the application to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a copy + of the library already present on the user's computer system, rather + than copying library functions into the executable, and (2) will + operate properly with a modified version of the library, if the user + installs one, as long as the modified version is interface-compatible + with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least three + years, to give the same user the materials specified in Subsection + 6a, above, for a charge no more than the cost of performing this + distribution. + + d) If distribution of the work is made by offering access to copy from a + designated place, offer equivalent access to copy the above specified + materials from the same place. + + e) Verify that the user has already received a copy of these materials + or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the Library" + must include any data and utility programs needed for reproducing the + executable from it. However, as a special exception, the materials to be + distributed need not include anything that is normally distributed (in + either source or binary form) with the major components (compiler, + kernel, and so on) of the operating system on which the executable runs, + unless that component itself accompanies the executable. + + It may happen that this requirement contradicts the license restrictions + of other proprietary libraries that do not normally accompany the + operating system. Such a contradiction means you cannot use both them + and the Library together in an executable that you distribute. + +7. You may place library facilities that are a work based on the Library + side-by-side in a single library together with other library facilities + not covered by this License, and distribute such a combined library, + provided that the separate distribution of the work based on the Library + and of the other library facilities is otherwise permitted, and provided + that you do these two things: + + a) Accompany the combined library with a copy of the same work based on + the Library, uncombined with any other library facilities. This must + be distributed under the terms of the Sections above. + + b) Give prominent notice with the combined library of the fact that part + of it is a work based on the Library, and explaining where to find + the accompanying uncombined form of the same work. + +8. You may not copy, modify, sublicense, link with, or distribute the + Library except as expressly provided under this License. Any attempt + otherwise to copy, modify, sublicense, link with, or distribute the + Library is void, and will automatically terminate your rights under this + License. However, parties who have received copies, or rights, from you + under this License will not have their licenses terminated so long as + such parties remain in full compliance. + +9. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Library or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Library (or any work based on the Library), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Library or works + based on it. + +10. Each time you redistribute the Library (or any work based on the + Library), the recipient automatically receives a license from the + original licensor to copy, distribute, link with or modify the Library + subject to these terms and conditions. You may not impose any further + restrictions on the recipients' exercise of the rights granted + herein. You are not responsible for enforcing compliance by third + parties with this License. + +11. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot + distribute so as to satisfy simultaneously your obligations under this + License and any other pertinent obligations, then as a consequence you + may not distribute the Library at all. For example, if a patent license + would not permit royalty-free redistribution of the Library by all + those who receive copies directly or indirectly through you, then the + only way you could satisfy both it and this License would be to refrain + entirely from distribution of the Library. + + If any portion of this section is held invalid or unenforceable under + any particular circumstance, the balance of the section is intended to + apply, and the section as a whole is intended to apply in other + circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system which is implemented + by public license practices. Many people have made generous + contributions to the wide range of software distributed through that + system in reliance on consistent application of that system; it is up + to the author/donor to decide if he or she is willing to distribute + software through any other system and a licensee cannot impose that + choice. + + This section is intended to make thoroughly clear what is believed to + be a consequence of the rest of this License. + +12. If the distribution and/or use of the Library is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Library under this License may add an + explicit geographical distribution limitation excluding those + countries, so that distribution is permitted only in or among countries + not thus excluded. In such case, this License incorporates the + limitation as if written in the body of this License. + +13. The Free Software Foundation may publish revised and/or new versions of + the Lesser General Public License from time to time. Such new versions + will be similar in spirit to the present version, but may differ in + detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the Library + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Library does not specify a license + version number, you may choose any version ever published by the Free + Software Foundation. + +14. If you wish to incorporate parts of the Library into other free + programs whose distribution conditions are incompatible with these, + write to the author to ask for permission. For software which is + copyrighted by the Free Software Foundation, write to the Free Software + Foundation; we sometimes make exceptions for this. Our decision will be + guided by the two goals of preserving the free status of all + derivatives of our free software and of promoting the sharing and reuse + of software generally. + +NO WARRANTY + +15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH + YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Libraries + +If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + +To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + +one line to give the library's name and an idea of what it does. +Copyright (C) year name of author + +This library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. + +This library is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this library; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add +information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in +the library `Frob' (a library for tweaking knobs) written +by James Random Hacker. + +signature of Ty Coon, 1 April 1990 +Ty Coon, President of Vice +That's all there is to it! diff --git a/src/contrib/licenses/MIT b/src/contrib/licenses/MIT new file mode 100644 index 0000000..fb122a0 --- /dev/null +++ b/src/contrib/licenses/MIT @@ -0,0 +1,19 @@ +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/contrib/macros.h b/src/contrib/macros.h new file mode 100644 index 0000000..78df47e --- /dev/null +++ b/src/contrib/macros.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Common macros. + */ + +#pragma once + +#ifndef MIN +/*! \brief Type-safe minimum macro. */ +#define MIN(a, b) \ + ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; }) + +/*! \brief Type-safe maximum macro. */ +#define MAX(a, b) \ + ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; }) +#endif + +#ifndef likely +/*! \brief Optimize for x to be true value. */ +#define likely(x) __builtin_expect((x), 1) +#endif + +#ifndef unlikely +/*! \brief Optimize for x to be false value. */ +#define unlikely(x) __builtin_expect((x), 0) +#endif diff --git a/src/contrib/mempattern.c b/src/contrib/mempattern.c new file mode 100644 index 0000000..f57139d --- /dev/null +++ b/src/contrib/mempattern.c @@ -0,0 +1,122 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> + +#include "contrib/mempattern.h" +#include "contrib/string.h" +#include "contrib/ucw/mempool.h" + +static void mm_nofree(void *p) +{ + /* nop */ +} + +static void *mm_malloc(void *ctx, size_t n) +{ + (void)ctx; + return malloc(n); +} + +void *mm_alloc(knot_mm_t *mm, size_t size) +{ + if (mm) { + return mm->alloc(mm->ctx, size); + } else { + return malloc(size); + } +} + +void *mm_calloc(knot_mm_t *mm, size_t nmemb, size_t size) +{ + if (nmemb == 0 || size == 0) { + return NULL; + } + if (mm) { + size_t total_size = nmemb * size; + if (total_size / nmemb != size) { // Overflow check + return NULL; + } + void *mem = mm_alloc(mm, total_size); + if (mem == NULL) { + return NULL; + } + return memzero(mem, total_size); + } else { + return calloc(nmemb, size); + } +} + +void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size) +{ + if (mm) { + void *p = mm->alloc(mm->ctx, size); + if (p == NULL) { + return NULL; + } else { + if (what) { + memcpy(p, what, + prev_size < size ? prev_size : size); + } + mm_free(mm, what); + return p; + } + } else { + return realloc(what, size); + } +} + +char *mm_strdup(knot_mm_t *mm, const char *s) +{ + if (s == NULL) { + return NULL; + } + if (mm) { + size_t len = strlen(s) + 1; + void *mem = mm_alloc(mm, len); + if (mem == NULL) { + return NULL; + } + return memcpy(mem, s, len); + } else { + return strdup(s); + } +} + +void mm_free(knot_mm_t *mm, void *what) +{ + if (mm) { + if (mm->free) { + mm->free(what); + } + } else { + free(what); + } +} + +void mm_ctx_init(knot_mm_t *mm) +{ + mm->ctx = NULL; + mm->alloc = mm_malloc; + mm->free = free; +} + +void mm_ctx_mempool(knot_mm_t *mm, size_t chunk_size) +{ + mm->ctx = mp_new(chunk_size); + mm->alloc = (knot_mm_alloc_t)mp_alloc; + mm->free = mm_nofree; +} diff --git a/src/contrib/mempattern.h b/src/contrib/mempattern.h new file mode 100644 index 0000000..c74f983 --- /dev/null +++ b/src/contrib/mempattern.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Memory allocation related functions. + */ + +#pragma once + +#include "libknot/mm_ctx.h" + +/*! \brief Default memory block size. */ +#define MM_DEFAULT_BLKSIZE 4096 + +/*! \brief Allocs using 'mm' if any, uses system malloc() otherwise. */ +void *mm_alloc(knot_mm_t *mm, size_t size); + +/*! \brief Callocs using 'mm' if any, uses system calloc() otherwise. */ +void *mm_calloc(knot_mm_t *mm, size_t nmemb, size_t size); + +/*! \brief Reallocs using 'mm' if any, uses system realloc() otherwise. */ +void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size); + +/*! \brief Strdups using 'mm' if any, uses system strdup() otherwise. */ +char *mm_strdup(knot_mm_t *mm, const char *s); + +/*! \brief Free using 'mm' if any, uses system free() otherwise. */ +void mm_free(knot_mm_t *mm, void *what); + +/*! \brief Initialize default memory allocation context. */ +void mm_ctx_init(knot_mm_t *mm); + +/*! \brief Memory pool context. */ +void mm_ctx_mempool(knot_mm_t *mm, size_t chunk_size); diff --git a/src/contrib/musl/inet_ntop.c b/src/contrib/musl/inet_ntop.c new file mode 100644 index 0000000..1b72a61 --- /dev/null +++ b/src/contrib/musl/inet_ntop.c @@ -0,0 +1,79 @@ +/* + * Copyright © 2005-2020 Rich Felker, et al. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <arpa/inet.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include "contrib/musl/inet_ntop.h" +#include "contrib/openbsd/strlcpy.h" + +const char *knot_inet_ntop(int af, const void *restrict a0, char *restrict s, socklen_t l) +{ + const unsigned char *a = a0; + int i, j, max, best; + char buf[100]; + + switch (af) { + case AF_INET: + if (snprintf(s, l, "%d.%d.%d.%d", a[0],a[1],a[2],a[3]) < l) + return s; + break; + case AF_INET6: + if (memcmp(a, "\0\0\0\0\0\0\0\0\0\0\377\377", 12)) + (void)snprintf(buf, sizeof buf, + "%x:%x:%x:%x:%x:%x:%x:%x", + 256*a[0]+a[1],256*a[2]+a[3], + 256*a[4]+a[5],256*a[6]+a[7], + 256*a[8]+a[9],256*a[10]+a[11], + 256*a[12]+a[13],256*a[14]+a[15]); + else + (void)snprintf(buf, sizeof buf, + "%x:%x:%x:%x:%x:%x:%d.%d.%d.%d", + 256*a[0]+a[1],256*a[2]+a[3], + 256*a[4]+a[5],256*a[6]+a[7], + 256*a[8]+a[9],256*a[10]+a[11], + a[12],a[13],a[14],a[15]); + /* Replace longest /(^0|:)[:0]{2,}/ with "::" */ + for (i=best=0, max=2; buf[i]; i++) { + if (i && buf[i] != ':') continue; + j = strspn(buf+i, ":0"); + if (j>max) best=i, max=j; + } + if (max>3) { + buf[best] = buf[best+1] = ':'; + memmove(buf+best+2, buf+best+max, i-best-max+1); + } + if (strlen(buf) < l) { + strlcpy(s, buf, l); + return s; + } + break; + default: + errno = EAFNOSUPPORT; + return 0; + } + errno = ENOSPC; + return 0; +} diff --git a/src/contrib/musl/inet_ntop.h b/src/contrib/musl/inet_ntop.h new file mode 100644 index 0000000..73faaee --- /dev/null +++ b/src/contrib/musl/inet_ntop.h @@ -0,0 +1,28 @@ +/* + * Copyright © 2005-2020 Rich Felker, et al. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include <sys/socket.h> + +const char *knot_inet_ntop(int af, const void *restrict a0, char *restrict s, socklen_t l); diff --git a/src/contrib/net.c b/src/contrib/net.c new file mode 100644 index 0000000..3de4a71 --- /dev/null +++ b/src/contrib/net.c @@ -0,0 +1,747 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/types.h> // OpenBSD +#include <netinet/tcp.h> // TCP_FASTOPEN +#include <netinet/in.h> +#include <poll.h> +#include <stdbool.h> +#include <sys/socket.h> +#include <sys/uio.h> +#include <unistd.h> + +#include "libknot/errcode.h" +#include "contrib/macros.h" +#include "contrib/net.h" +#include "contrib/sockaddr.h" +#include "contrib/time.h" + +/*! + * \brief Enable socket option. + */ +static int sockopt_enable(int sock, int level, int optname) +{ + const int enable = 1; + if (setsockopt(sock, level, optname, &enable, sizeof(enable)) != 0) { + return knot_map_errno(); + } + + return KNOT_EOK; +} + +/*! + * \brief Create a non-blocking socket. + * + * Prefer SOCK_NONBLOCK if available to save one fcntl() syscall. + * + */ +static int socket_create(int family, int type, int proto) +{ +#ifdef SOCK_NONBLOCK + type |= SOCK_NONBLOCK; +#endif + int sock = socket(family, type, proto); + if (sock < 0) { + return knot_map_errno(); + } + +#ifndef SOCK_NONBLOCK + if (fcntl(sock, F_SETFL, O_NONBLOCK) != 0) { + int ret = knot_map_errno(); + close(sock); + return ret; + } +#endif + +/* + * OS X doesn't support MSG_NOSIGNAL. Use SO_NOSIGPIPE socket option instead. + */ +#if defined(__APPLE__) && !defined(MSG_NOSIGNAL) +#define MSG_NOSIGNAL 0 + int ret = sockopt_enable(sock, SOL_SOCKET, SO_NOSIGPIPE); + if (ret != KNOT_EOK) { + return ret; + } +#endif + + return sock; +} + +int net_unbound_socket(int type, const struct sockaddr_storage *addr) +{ + if (addr == NULL) { + return KNOT_EINVAL; + } + + /* Create socket. */ + return socket_create(addr->ss_family, type, 0); +} + +struct option { + int level; + int name; +}; + +/*! + * \brief Get setsock option for binding non-local address. + */ +static const struct option *nonlocal_option(int family) +{ + static const struct option ipv4 = { + #if defined(IP_FREEBIND) + IPPROTO_IP, IP_FREEBIND + #elif defined(IP_BINDANY) + IPPROTO_IP, IP_BINDANY + #else + 0, 0 + #endif + }; + + static const struct option ipv6 = { + #if defined(IP_FREEBIND) + IPPROTO_IP, IP_FREEBIND + #elif defined(IPV6_BINDANY) + IPPROTO_IPV6, IPV6_BINDANY + #else + 0, 0 + #endif + + }; + + switch (family) { + case AF_INET: return &ipv4; + case AF_INET6: return &ipv6; + default: + return NULL; + } +} + +static int enable_nonlocal(int sock, int family) +{ + const struct option *opt = nonlocal_option(family); + if (opt == NULL || opt->name == 0) { + return KNOT_ENOTSUP; + } + + return sockopt_enable(sock, opt->level, opt->name); +} + +static int enable_reuseport(int sock) +{ +#ifdef ENABLE_REUSEPORT +# if defined(__FreeBSD__) + return sockopt_enable(sock, SOL_SOCKET, SO_REUSEPORT_LB); +# else + return sockopt_enable(sock, SOL_SOCKET, SO_REUSEPORT); +# endif +#else + return KNOT_ENOTSUP; +#endif +} + +static void unlink_unix_socket(const struct sockaddr_storage *addr) +{ + char path[SOCKADDR_STRLEN] = { 0 }; + sockaddr_tostr(path, sizeof(path), addr); + unlink(path); +} + +int net_bound_socket(int type, const struct sockaddr_storage *addr, + net_bind_flag_t flags, mode_t unix_mode) +{ + /* Create socket. */ + int sock = net_unbound_socket(type, addr); + if (sock < 0) { + return sock; + } + + /* Unlink UNIX sock if exists. */ + if (addr->ss_family == AF_UNIX) { + unlink_unix_socket(addr); + } + + /* Reuse old address if taken. */ + int ret = sockopt_enable(sock, SOL_SOCKET, SO_REUSEADDR); + if (ret != KNOT_EOK) { + close(sock); + return ret; + } + +#if defined(__linux__) + /* Set MSS (Maximum Segment Size) limit. */ + if (addr->ss_family != AF_UNIX && type == SOCK_STREAM) { + const int mss = KNOT_TCP_MSS; + if (setsockopt(sock, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) != 0) { + ret = knot_map_errno(); + close(sock); + return ret; + } + } +#endif + + /* Don't bind IPv4 for IPv6 any address. */ + if (addr->ss_family == AF_INET6) { + ret = sockopt_enable(sock, IPPROTO_IPV6, IPV6_V6ONLY); + if (ret != KNOT_EOK) { + close(sock); + return ret; + } + } + + /* Allow bind to non-local address. */ + if (flags & NET_BIND_NONLOCAL) { + ret = enable_nonlocal(sock, addr->ss_family); + if (ret != KNOT_EOK) { + close(sock); + return ret; + } + } + + /* Allow to bind the same address by multiple threads. */ + if (flags & NET_BIND_MULTIPLE) { + ret = enable_reuseport(sock); + if (ret != KNOT_EOK) { + close(sock); + return ret; + } + } + + /* Bind to specified address. */ + ret = bind(sock, (const struct sockaddr *)addr, sockaddr_len(addr)); + if (ret < 0) { + ret = knot_map_errno(); + close(sock); + return ret; + } + + if (addr->ss_family == AF_UNIX && unix_mode != 0) { + const char *path = ((const struct sockaddr_un *)addr)->sun_path; + if (chmod(path, unix_mode) != 0) { + ret = knot_map_errno(); + close(sock); + return ret; + } + } + + return sock; +} + +static int tfo_connect(int sock, const struct sockaddr_storage *addr) +{ +#if defined(__linux__) + /* connect() will be called implicitly with sendmsg(). */ + return KNOT_EOK; +#elif defined(__FreeBSD__) + return sockopt_enable(sock, IPPROTO_TCP, TCP_FASTOPEN); +#elif defined(__APPLE__) + /* Connection is performed lazily when first data is sent. */ + sa_endpoints_t ep = { + .sae_dstaddr = (const struct sockaddr *)addr, + .sae_dstaddrlen = sockaddr_len(addr) + }; + int flags = CONNECT_DATA_IDEMPOTENT | CONNECT_RESUME_ON_READ_WRITE; + + int ret = connectx(sock, &ep, SAE_ASSOCID_ANY, flags, NULL, 0, NULL, NULL); + return (ret == 0 ? KNOT_EOK : knot_map_errno()); +#else + return KNOT_ENOTSUP; +#endif +} + +int net_connected_socket(int type, const struct sockaddr_storage *dst_addr, + const struct sockaddr_storage *src_addr, bool tfo) +{ + if (dst_addr == NULL) { + return KNOT_EINVAL; + } + + /* Check port. */ + if (sockaddr_port(dst_addr) == 0) { + return KNOT_NET_EADDR; + } + + /* Bind to specific source address - if set. */ + int sock = -1; + if (src_addr && src_addr->ss_family != AF_UNSPEC) { + sock = net_bound_socket(type, src_addr, 0, 0); + } else { + sock = net_unbound_socket(type, dst_addr); + } + if (sock < 0) { + return sock; + } + + /* Connect to destination. */ + if (tfo && net_is_stream(sock)) { + int ret = tfo_connect(sock, dst_addr); + if (ret != KNOT_EOK) { + close(sock); + return ret; + } + } else { + int ret = connect(sock, (const struct sockaddr *)dst_addr, + sockaddr_len(dst_addr)); + if (ret != 0 && errno != EINPROGRESS) { + ret = knot_map_errno(); + close(sock); + return ret; + } + } + + return sock; +} + +int net_bound_tfo(int sock, int backlog) +{ +#if defined(TCP_FASTOPEN) +#if defined(__APPLE__) + if (backlog > 0) { + backlog = 1; // just on-off switch on macOS + } +#endif + if (setsockopt(sock, IPPROTO_TCP, TCP_FASTOPEN, &backlog, sizeof(backlog)) != 0) { + return knot_map_errno(); + } + + return KNOT_EOK; +#endif + return KNOT_ENOTSUP; +} + +bool net_is_connected(int sock) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + return (getpeername(sock, (struct sockaddr *)&addr, &len) == 0); +} + +int net_socktype(int sock) +{ + int type; + socklen_t size = sizeof(type); + + if (getsockopt(sock, SOL_SOCKET, SO_TYPE, &type, &size) == 0) { + return type; + } else { + return AF_UNSPEC; + } +} + +bool net_is_stream(int sock) +{ + return net_socktype(sock) == SOCK_STREAM; +} + +int net_accept(int sock, struct sockaddr_storage *addr) +{ + socklen_t len = sizeof(*addr); + socklen_t *addr_len = (addr != NULL) ? &len : NULL; + + int remote = -1; + +#if defined(HAVE_ACCEPT4) && defined(SOCK_NONBLOCK) + remote = accept4(sock, (struct sockaddr *)addr, addr_len, SOCK_NONBLOCK); + if (remote < 0) { + return knot_map_errno(); + } +#else + remote = accept(sock, (struct sockaddr *)addr, addr_len); + if (fcntl(remote, F_SETFL, O_NONBLOCK) != 0) { + int error = knot_map_errno(); + close(remote); + return error; + } +#endif + + return remote; +} + +void net_reset(int sock) +{ + struct sockaddr unspec = { .sa_family = AF_UNSPEC }; + (void)connect(sock, &unspec, sizeof(unspec)); +} + +/* -- I/O interface handling partial -------------------------------------- */ + +/*! + * \brief Perform \a poll() on one socket. + */ +static int poll_one(int fd, int events, int timeout_ms) +{ + struct pollfd pfd = { + .fd = fd, + .events = events + }; + + return poll(&pfd, 1, timeout_ms); +} + +/*! + * \brief Check if we should wait for I/O readiness. + * + * \param error \a errno set by the failed I/O operation. + */ +static bool io_should_wait(int error) +{ + if (error == EAGAIN || error == EWOULDBLOCK || /* Socket data not ready. */ + error == ENOMEM || error == ENOBUFS) { /* Insufficient resources. */ + return true; + } + +#ifndef __linux__ + /* FreeBSD: connection in progress. */ + if (error == ENOTCONN) { + return true; + } +#endif + + return false; +} + +/*! + * \brief Check if we should wait again. + * + * \param error \a errno set by the failed wait operation. + */ +static bool wait_should_retry(int error) +{ + if (error == EINTR || /* System call interrupted. */ + error == EAGAIN || error == ENOMEM) { /* Insufficient resources. */ + return true; + } + return false; +} + +/*! + * \brief I/O operation callbacks. + */ +struct io { + ssize_t (*process)(int sockfd, struct msghdr *msg, int timeout_ms); + int (*wait)(int sockfd, int timeout_ms); +}; + +/*! + * \brief Get total size of I/O vector in a message. + */ +static size_t msg_iov_len(const struct msghdr *msg) +{ + size_t total = 0; + + for (int i = 0; i < msg->msg_iovlen; i++) { + total += msg->msg_iov[i].iov_len; + } + + return total; +} + +/*! + * \brief Shift processed data out of message IO vectors. + */ +static void msg_iov_shift(struct msghdr *msg, size_t done) +{ + struct iovec *iov = msg->msg_iov; + int iovlen = msg->msg_iovlen; + + for (int i = 0; i < iovlen && done > 0; i++) { + if (iov[i].iov_len > done) { + iov[i].iov_base += done; + iov[i].iov_len -= done; + done = 0; + } else { + done -= iov[i].iov_len; + msg->msg_iov += 1; + msg->msg_iovlen -= 1; + } + } + + assert(done == 0); +} + +#define TIMEOUT_CTX_INIT \ + struct timespec begin, end; \ + if (*timeout_ptr > 0) { \ + clock_gettime(CLOCK_MONOTONIC, &begin); \ + } + +#define TIMEOUT_CTX_UPDATE \ + if (*timeout_ptr > 0) { \ + clock_gettime(CLOCK_MONOTONIC, &end); \ + int running_ms = time_diff_ms(&begin, &end); \ + *timeout_ptr = MAX(*timeout_ptr - running_ms, 0); \ + } + +/*! + * \brief Perform an I/O operation with a socket with waiting. + * + * \param oneshot If set, doesn't wait until the buffer is fully processed. + */ +static ssize_t io_exec(const struct io *io, int fd, struct msghdr *msg, + bool oneshot, int *timeout_ptr) +{ + size_t done = 0; + size_t total = msg_iov_len(msg); + + for (;;) { + /* Perform I/O. */ + ssize_t ret = io->process(fd, msg, *timeout_ptr); + if (ret == -1 && errno == EINTR) { + continue; + } + if (ret > 0) { + done += ret; + if (oneshot || done == total) { + break; + } + msg_iov_shift(msg, ret); + } + + /* Wait for data readiness. */ + if (ret > 0 || (ret == -1 && io_should_wait(errno))) { + for (;;) { + TIMEOUT_CTX_INIT + + ret = io->wait(fd, *timeout_ptr); + + if (ret == 1) { + TIMEOUT_CTX_UPDATE + /* Ready, retry process. */ + break; + } else if (ret == -1 && wait_should_retry(errno)) { + TIMEOUT_CTX_UPDATE + /* Interrupted or transient error, continue waiting. */ + continue; + } else if (ret == 0) { + /* Timed out, exit. */ + return KNOT_ETIMEOUT; + } else { + /* In specific circumstances with Valgrind, + * poll() returns wrong value. + */ + assert(ret <= 1); + assert(ret >= -1); + /* Other error, exit. */ + return KNOT_ECONN; + } + } + } else { + /* Disconnect or error. */ + return KNOT_ECONN; + } + } + + return done; +} + +static ssize_t recv_process(int fd, struct msghdr *msg, int timeout_ms) +{ + return recvmsg(fd, msg, MSG_DONTWAIT | MSG_NOSIGNAL); +} + +static int recv_wait(int fd, int timeout_ms) +{ + return poll_one(fd, POLLIN, timeout_ms); +} + +static ssize_t recv_data(int sock, struct msghdr *msg, bool oneshot, int *timeout_ptr) +{ + static const struct io RECV_IO = { + .process = recv_process, + .wait = recv_wait + }; + + return io_exec(&RECV_IO, sock, msg, oneshot, timeout_ptr); +} + +static ssize_t send_process_tfo(int fd, struct msghdr *msg, int timeout_ms) +{ +#if defined(__linux__) + int ret = sendmsg(fd, msg, MSG_FASTOPEN); + if (ret != 0 && errno == EINPROGRESS) { + if (poll_one(fd, POLLOUT, timeout_ms) != 1) { + errno = ETIMEDOUT; + return -1; + } + ret = sendmsg(fd, msg, MSG_NOSIGNAL); + } + return ret; +#else + return sendmsg(fd, msg, MSG_NOSIGNAL); +#endif +} + +static ssize_t send_process(int fd, struct msghdr *msg, int timeout_ms) +{ + return sendmsg(fd, msg, MSG_NOSIGNAL); +} + +static int send_wait(int fd, int timeout_ms) +{ + return poll_one(fd, POLLOUT, timeout_ms); +} + +static ssize_t send_data(int sock, struct msghdr *msg, int *timeout_ptr, bool tfo) +{ + static const struct io SEND_IO = { + .process = send_process, + .wait = send_wait + }; + static const struct io SEND_IO_TFO = { + .process = send_process_tfo, + .wait = send_wait + }; + + return io_exec(tfo ? &SEND_IO_TFO : &SEND_IO, sock, msg, false, timeout_ptr); +} + +/* -- generic stream and datagram I/O -------------------------------------- */ + +ssize_t net_base_send(int sock, const uint8_t *buffer, size_t size, + const struct sockaddr_storage *addr, int timeout_ms) +{ + if (sock < 0 || buffer == NULL) { + return KNOT_EINVAL; + } + + struct iovec iov = { 0 }; + iov.iov_base = (void *)buffer; + iov.iov_len = size; + + struct msghdr msg = { 0 }; + msg.msg_name = (void *)addr; + msg.msg_namelen = sockaddr_len(addr); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + int ret = send_data(sock, &msg, &timeout_ms, false); + if (ret < 0) { + return ret; + } else if (ret != size) { + return KNOT_ECONN; + } + + return ret; +} + +ssize_t net_base_recv(int sock, uint8_t *buffer, size_t size, + struct sockaddr_storage *addr, int timeout_ms) +{ + if (sock < 0 || buffer == NULL) { + return KNOT_EINVAL; + } + + struct iovec iov = { 0 }; + iov.iov_base = buffer; + iov.iov_len = size; + + struct msghdr msg = { 0 }; + msg.msg_name = (void *)addr; + msg.msg_namelen = addr ? sizeof(*addr) : 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + return recv_data(sock, &msg, true, &timeout_ms); +} + +ssize_t net_dgram_send(int sock, const uint8_t *buffer, size_t size, + const struct sockaddr_storage *addr) +{ + return net_base_send(sock, buffer, size, addr, 0); +} + +ssize_t net_dgram_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms) +{ + return net_base_recv(sock, buffer, size, NULL, timeout_ms); +} + +ssize_t net_stream_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms) +{ + return net_base_send(sock, buffer, size, NULL, timeout_ms); +} + +ssize_t net_stream_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms) +{ + return net_base_recv(sock, buffer, size, NULL, timeout_ms); +} + +/* -- DNS specific I/O ----------------------------------------------------- */ + +ssize_t net_dns_tcp_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms, + struct sockaddr_storage *tfo_addr) +{ + if (sock < 0 || buffer == NULL || size > UINT16_MAX) { + return KNOT_EINVAL; + } + + struct iovec iov[2]; + uint16_t pktsize = htons(size); + iov[0].iov_base = &pktsize; + iov[0].iov_len = sizeof(uint16_t); + iov[1].iov_base = (void *)buffer; + iov[1].iov_len = size; + + struct msghdr msg = { 0 }; + msg.msg_iov = iov; + msg.msg_iovlen = 2; + msg.msg_name = (void *)tfo_addr; + msg.msg_namelen = tfo_addr ? sizeof(*tfo_addr) : 0; + + ssize_t ret = send_data(sock, &msg, &timeout_ms, tfo_addr != NULL); + if (ret < 0) { + return ret; + } + + return size; /* Do not count the size prefix. */ +} + +ssize_t net_dns_tcp_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms) +{ + if (sock < 0 || buffer == NULL) { + return KNOT_EINVAL; + } + + uint16_t pktsize = 0; + + struct iovec iov = { + .iov_base = &pktsize, + .iov_len = sizeof(pktsize) + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + /* Receive size. */ + int ret = recv_data(sock, &msg, false, &timeout_ms); + if (ret != sizeof(pktsize)) { + return ret; + } + pktsize = ntohs(pktsize); + + /* Check packet size */ + if (size < pktsize) { + return KNOT_ESPACE; + } + + /* Receive payload. */ + iov.iov_base = buffer; + iov.iov_len = pktsize; + + return recv_data(sock, &msg, false, &timeout_ms); +} diff --git a/src/contrib/net.h b/src/contrib/net.h new file mode 100644 index 0000000..85a8bd6 --- /dev/null +++ b/src/contrib/net.h @@ -0,0 +1,209 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/socket.h> +#include <sys/stat.h> + +// 1280 (IPv6 minimum link MTU) - 40 (IPv6 fixed header) - 20 (TCP fixed header) +#define KNOT_TCP_MSS 1220 + +/*! + * \brief Network interface flags. + */ +typedef enum { + NET_BIND_NONLOCAL = (1 << 0), //!< Allow to bind unavailable address. + NET_BIND_MULTIPLE = (1 << 1), //!< Allow to bind address multiple times. +} net_bind_flag_t; + +/*! + * \brief Create unbound socket of given family and type. + * + * \note The socket is set to non-blocking mode. + * + * \param type Socket transport type (SOCK_STREAM, SOCK_DGRAM). + * \param addr Socket address. + * + * \return socket or error code + */ +int net_unbound_socket(int type, const struct sockaddr_storage *addr); + +/*! + * \brief Create socket bound to given address. + * + * The socket is set to non-blocking mode. + * + * \param type Socket transport type (SOCK_STREAM, SOCK_DGRAM). + * \param addr Socket address. + * \param flags Socket binding options. + * \param unix_mode Socket file mode (UNIX socket only). 0 means don't set mode. + * + * \return socket or error code + */ +int net_bound_socket(int type, const struct sockaddr_storage *addr, + net_bind_flag_t flags, mode_t unix_mode); + +/*! + * \brief Create socket connected (asynchronously) to destination address. + * + * \note The socket is set to non-blocking mode. + * + * \param type Socket transport type (SOCK_STREAM, SOCK_DGRAM). + * \param dst_addr Destination address. + * \param src_addr Source address (can be NULL). + * \param tfo Enable TCP Fast Open. + * + * \return socket or error code + */ +int net_connected_socket(int type, const struct sockaddr_storage *dst_addr, + const struct sockaddr_storage *src_addr, bool tfo); + +/*! + * \brief Enables TCP Fast Open on a bound socket. + * + * \param sock Socket. + * + * \return KNOT_EOK or error code + */ +int net_bound_tfo(int sock, int backlog); + +/*! + * \brief Return true if the socket is fully connected. + * + * \param sock Socket. + * + * \return true if connected + */ +bool net_is_connected(int sock); + +/*! + * \brief Get socket type (e.g. \a SOCK_STREAM). + * + * \param sock Socket. + */ +int net_socktype(int sock); + +/*! + * \brief Check if socket is a SOCK_STREAM socket. + */ +bool net_is_stream(int sock); + +/*! + * \brief Accept a connection on a listening socket. + * + * \brief The socket is set to non-blocking mode. + * + * \param sock Socket + * \param addr Remote address (can be NULL). + * + * \return socket or error code + */ +int net_accept(int sock, struct sockaddr_storage *addr); + +/*! + * \brief Reset a TCP connection (with a RST packet). + */ +void net_reset(int sock); + +/*! + * \brief Send a message on a socket. + * + * The socket can be SOCK_STREAM or SOCK_DGRAM. + * + * The implementation handles partial-writes automatically. + * + * \param[in] sock Socket. + * \param[in] buffer Message buffer. + * \param[in] size Size of the message. + * \param[in] addr Remote address (ignored for SOCK_STREAM). + * \param[in] timeout_ms Write timeout in milliseconds (-1 for infinity, + * not valid for SOCK_DGRAM). + * + * \return Number of bytes sent or negative error code. + */ +ssize_t net_base_send(int sock, const uint8_t *buffer, size_t size, + const struct sockaddr_storage *addr, int timeout_ms); + +/*! + * \brief Receive a message from a socket. + * + * \param[in] sock Socket. + * \param[out] buffer Receiving buffer. + * \param[in] size Capacity of the receiving buffer. + * \param[out] addr Remote address (can be NULL). + * \param[in] timeout_ms Read timeout in milliseconds (-1 for infinity). + * + * \return Number of bytes read or negative error code. + */ +ssize_t net_base_recv(int sock, uint8_t *buffer, size_t size, + struct sockaddr_storage *addr, int timeout_ms); + +/*! + * \brief Send a message on a SOCK_DGRAM socket. + * + * \see net_base_send + */ +ssize_t net_dgram_send(int sock, const uint8_t *buffer, size_t size, + const struct sockaddr_storage *addr); + +/*! + * \brief Receive a message from a SOCK_DGRAM socket. + * + * \see net_base_recv + */ +ssize_t net_dgram_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms); + +/*! + * \brief Send a message on a SOCK_STREAM socket. + * + * \see net_base_send + */ +ssize_t net_stream_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms); + +/*! + * \brief Receive a message from a SOCK_STREAM socket. + * + * \see net_base_recv + */ +ssize_t net_stream_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms); + +/*! + * \brief Send a DNS message on a TCP socket. + * + * The outgoing message is prefixed with a two-byte value carrying the DNS + * message size according to the specification. These two bytes are not + * reflected in the return value. + * + * \param[in] tfo_addr If not NULL, send using TCP Fast Open to this address. + * + * \see net_base_send + */ +ssize_t net_dns_tcp_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms, + struct sockaddr_storage *tfo_addr); + +/*! + * \brief Receive a DNS message from a TCP socket. + * + * The first two bytes of the incoming message are interpreted as a DNS message + * size according to the specification. These two bytes are not included in + * the returned size. Only a complete DNS message is retrieved. + * + * \see net_base_recv + */ +ssize_t net_dns_tcp_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms); diff --git a/src/contrib/openbsd/LICENSE b/src/contrib/openbsd/LICENSE new file mode 100644 index 0000000..e9a1aaa --- /dev/null +++ b/src/contrib/openbsd/LICENSE @@ -0,0 +1,2 @@ +../licenses/0BSD +../licenses/BSD-3-Clause
\ No newline at end of file diff --git a/src/contrib/openbsd/siphash.c b/src/contrib/openbsd/siphash.c new file mode 100644 index 0000000..26b8cfc --- /dev/null +++ b/src/contrib/openbsd/siphash.c @@ -0,0 +1,176 @@ +/* $OpenBSD: siphash.c,v 1.6 2017/04/12 17:41:49 deraadt Exp $ */ + +/*- + * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d + * are the number of compression rounds and the number of finalization rounds. + * A compression round is identical to a finalization round and this round + * function is called SipRound. Given a 128-bit key k and a (possibly empty) + * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m). + * + * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18, + * by Jean-Philippe Aumasson and Daniel J. Bernstein, + * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa + * https://131002.net/siphash/siphash.pdf + * https://131002.net/siphash/ + */ + +#include <string.h> + +#include "libknot/endian.h" +#include "contrib/string.h" +#include "contrib/openbsd/siphash.h" + +static void SipHash_CRounds(SIPHASH_CTX *, int); +static void SipHash_Rounds(SIPHASH_CTX *, int); + +void +SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key) +{ + uint64_t k0, k1; + + k0 = le64toh(key->k0); + k1 = le64toh(key->k1); + + ctx->v[0] = 0x736f6d6570736575ULL ^ k0; + ctx->v[1] = 0x646f72616e646f6dULL ^ k1; + ctx->v[2] = 0x6c7967656e657261ULL ^ k0; + ctx->v[3] = 0x7465646279746573ULL ^ k1; + + memset(ctx->buf, 0, sizeof(ctx->buf)); + ctx->bytes = 0; +} + +void +SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf, const void *src, size_t len) +{ + const uint8_t *ptr = src; + size_t left, used; + + if (len == 0) + return; + + used = ctx->bytes % sizeof(ctx->buf); + ctx->bytes += len; + + if (used > 0) { + left = sizeof(ctx->buf) - used; + + if (len >= left) { + memcpy(&ctx->buf[used], ptr, left); + SipHash_CRounds(ctx, rc); + len -= left; + ptr += left; + } else { + memcpy(&ctx->buf[used], ptr, len); + return; + } + } + + while (len >= sizeof(ctx->buf)) { + memcpy(ctx->buf, ptr, sizeof(ctx->buf)); + SipHash_CRounds(ctx, rc); + len -= sizeof(ctx->buf); + ptr += sizeof(ctx->buf); + } + + if (len > 0) + memcpy(&ctx->buf, ptr, len); +} + +uint64_t +SipHash_End(SIPHASH_CTX *ctx, int rc, int rf) +{ + uint64_t r; + size_t left, used; + + used = ctx->bytes % sizeof(ctx->buf); + left = sizeof(ctx->buf) - used; + memset(&ctx->buf[used], 0, left - 1); + ctx->buf[7] = ctx->bytes; + + SipHash_CRounds(ctx, rc); + ctx->v[2] ^= 0xff; + SipHash_Rounds(ctx, rf); + + r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]); + memzero(ctx, sizeof(*ctx)); + return htole64(r); +} + +uint64_t +SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len) +{ + SIPHASH_CTX ctx; + + SipHash_Init(&ctx, key); + SipHash_Update(&ctx, rc, rf, src, len); + return (SipHash_End(&ctx, rc, rf)); +} + +#define SIP_ROTL(x, b) ((x) << (b)) | ( (x) >> (64 - (b))) + +static void +SipHash_Rounds(SIPHASH_CTX *ctx, int rounds) +{ + while (rounds--) { + ctx->v[0] += ctx->v[1]; + ctx->v[2] += ctx->v[3]; + ctx->v[1] = SIP_ROTL(ctx->v[1], 13); + ctx->v[3] = SIP_ROTL(ctx->v[3], 16); + + ctx->v[1] ^= ctx->v[0]; + ctx->v[3] ^= ctx->v[2]; + ctx->v[0] = SIP_ROTL(ctx->v[0], 32); + + ctx->v[2] += ctx->v[1]; + ctx->v[0] += ctx->v[3]; + ctx->v[1] = SIP_ROTL(ctx->v[1], 17); + ctx->v[3] = SIP_ROTL(ctx->v[3], 21); + + ctx->v[1] ^= ctx->v[2]; + ctx->v[3] ^= ctx->v[0]; + ctx->v[2] = SIP_ROTL(ctx->v[2], 32); + } +} + +static void +SipHash_CRounds(SIPHASH_CTX *ctx, int rounds) +{ + uint64_t tmp; + + memcpy(&tmp, ctx->buf, sizeof(tmp)); + uint64_t m = le64toh(tmp); + + ctx->v[3] ^= m; + SipHash_Rounds(ctx, rounds); + ctx->v[0] ^= m; +} diff --git a/src/contrib/openbsd/siphash.h b/src/contrib/openbsd/siphash.h new file mode 100644 index 0000000..59d952d --- /dev/null +++ b/src/contrib/openbsd/siphash.h @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $OpenBSD: siphash.h,v 1.3 2015/02/20 11:51:03 tedu Exp $ + */ + +/* + * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions) + * optimized for speed on short messages returning a 64bit hash/digest value. + * + * The number of rounds is defined during the initialization: + * SipHash24_Init() for the fast and reasonable strong version + * SipHash48_Init() for the strong version (half as fast) + * + * struct SIPHASH_CTX ctx; + * SipHash24_Init(&ctx); + * SipHash_SetKey(&ctx, "16bytes long key"); + * SipHash_Update(&ctx, pointer_to_string, length_of_string); + * SipHash_End(&ctx); + */ + +#ifndef _SIPHASH_H_ +#define _SIPHASH_H_ + +#include <stddef.h> +#include <stdint.h> + +#define SIPHASH_BLOCK_LENGTH 8 +#define SIPHASH_KEY_LENGTH 16 +#define SIPHASH_DIGEST_LENGTH 8 + +typedef struct _SIPHASH_CTX { + uint64_t v[4]; + uint8_t buf[SIPHASH_BLOCK_LENGTH]; + uint32_t bytes; +} SIPHASH_CTX; + +typedef struct { + uint64_t k0; + uint64_t k1; +} SIPHASH_KEY; + +void SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *); +void SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t); +uint64_t SipHash_End(SIPHASH_CTX *, int, int); +uint64_t SipHash(const SIPHASH_KEY *, int, int, const void *, size_t); + +#define SipHash24_Init(_c, _k) SipHash_Init((_c), (_k)) +#define SipHash24_Update(_c, _p, _l) SipHash_Update((_c), 2, 4, (_p), (_l)) +#define SipHash24_End(_d) SipHash_End((_d), 2, 4) +#define SipHash24(_k, _p, _l) SipHash((_k), 2, 4, (_p), (_l)) + +#define SipHash48_Init(_c, _k) SipHash_Init((_c), (_k)) +#define SipHash48_Update(_c, _p, _l) SipHash_Update((_c), 4, 8, (_p), (_l)) +#define SipHash48_End(_d) SipHash_End((_d), 4, 8) +#define SipHash48(_k, _p, _l) SipHash((_k), 4, 8, (_p), (_l)) + +#endif /* _SIPHASH_H_ */ diff --git a/src/contrib/openbsd/strlcat.c b/src/contrib/openbsd/strlcat.c new file mode 100644 index 0000000..1409062 --- /dev/null +++ b/src/contrib/openbsd/strlcat.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +#include "contrib/openbsd/strlcat.h" + +size_t +knot_strlcat(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return(dlen + (s - src)); /* count does not include NUL */ +} diff --git a/src/contrib/openbsd/strlcat.h b/src/contrib/openbsd/strlcat.h new file mode 100644 index 0000000..7016069 --- /dev/null +++ b/src/contrib/openbsd/strlcat.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#pragma once + +#ifndef HAVE_STRLCAT +#define strlcat(dst, src, size) knot_strlcat(dst, src, size) +#endif + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +knot_strlcat(char *dst, const char *src, size_t siz); diff --git a/src/contrib/openbsd/strlcpy.c b/src/contrib/openbsd/strlcpy.c new file mode 100644 index 0000000..eafc0e4 --- /dev/null +++ b/src/contrib/openbsd/strlcpy.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +#include "contrib/openbsd/strlcpy.h" + +size_t +knot_strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} diff --git a/src/contrib/openbsd/strlcpy.h b/src/contrib/openbsd/strlcpy.h new file mode 100644 index 0000000..6421068 --- /dev/null +++ b/src/contrib/openbsd/strlcpy.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#pragma once + +#ifndef HAVE_STRLCPY +#define strlcpy(dst, src, size) knot_strlcpy(dst, src, size) +#endif + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +knot_strlcpy(char *dst, const char *src, size_t siz); diff --git a/src/contrib/os.h b/src/contrib/os.h new file mode 100644 index 0000000..8d4a2e2 --- /dev/null +++ b/src/contrib/os.h @@ -0,0 +1,37 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <stdio.h> +#include <sys/utsname.h> + +inline static bool linux_at_least(unsigned version_first, unsigned version_second) +{ +#if defined(__linux__) + struct utsname info; + unsigned first, second; + if (uname(&info) != 0 || sscanf(info.release, "%u.%u.", &first, &second) != 2) { + return false; + } else { + return first > version_first || + (first = version_first && second >= version_second); + } +#else + return false; +#endif +} diff --git a/src/contrib/proxyv2/proxyv2.c b/src/contrib/proxyv2/proxyv2.c new file mode 100644 index 0000000..0e5c90b --- /dev/null +++ b/src/contrib/proxyv2/proxyv2.c @@ -0,0 +1,281 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + Copyright (C) 2021 Fastly, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <arpa/inet.h> +#include <stdint.h> +#include <string.h> + +#include "contrib/proxyv2/proxyv2.h" +#include "contrib/sockaddr.h" +#include "libknot/errcode.h" + +/* + * Minimal implementation of the haproxy PROXY v2 protocol. + * + * Supports extracting the original client address and client port number from + * the haproxy PROXY v2 protocol's address block. + * + * See https://www.haproxy.org/download/2.5/doc/proxy-protocol.txt for the + * protocol specification. + */ + +static const char PROXYV2_SIG[12] = "\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A"; + +/* + * The part of the PROXY v2 payload following the signature. + */ +struct proxyv2_hdr { + /* + * The protocol version and command. + * + * The upper four bits contain the version which must be \x2 and the + * receiver must only accept this value. + * + * The lower four bits represent the command, which is \x0 for LOCAL + * and \x1 for PROXY. + */ + uint8_t ver_cmd; + + /* + * The transport protocol and address family. The upper four bits + * contain the address family and the lower four bits contain the + * protocol. + * + * The relevant values for DNS are: + * \x11: TCP over IPv4 + * \x12: UDP over IPv4 + * \x21: TCP over IPv6 + * \x22: UDP over IPv6 + */ + uint8_t fam_addr; + + /* + * The number of PROXY v2 payload bytes following this header to skip + * to reach the proxied packet (i.e., start of the original DNS message). + */ + uint16_t len; +}; + +/* + * The PROXY v2 address block for IPv4. + */ +struct proxyv2_addr_ipv4 { + uint8_t src_addr[4]; + uint8_t dst_addr[4]; + uint16_t src_port; + uint16_t dst_port; +}; + +/* + * The PROXY v2 address block for IPv6. + */ +struct proxyv2_addr_ipv6 { + uint8_t src_addr[16]; + uint8_t dst_addr[16]; + uint16_t src_port; + uint16_t dst_port; +}; + +const size_t PROXYV2_HEADER_MAXLEN = sizeof(PROXYV2_SIG) + + sizeof(struct proxyv2_hdr) + + sizeof(struct proxyv2_addr_ipv6); + +/* + * Make sure the C compiler lays out the PROXY v2 address block structs so that + * they can be memcpy()'d off the wire. + */ +#if (__STDC_VERSION__ >= 201112L) +_Static_assert(sizeof(struct proxyv2_hdr) == 4, + "struct proxyv2_hdr is correct size"); +_Static_assert(sizeof(struct proxyv2_addr_ipv4) == 12, + "struct proxyv2_addr_ipv4 is correct size"); +_Static_assert(sizeof(struct proxyv2_addr_ipv6) == 36, + "struct proxyv2_addr_ipv6 is correct size"); +#endif + +int proxyv2_header_offset(void *base, size_t len_base) +{ + /* + * Check that 'base' has enough bytes to read the PROXY v2 signature + * and header, and if so whether the PROXY v2 signature is present. + */ + if (len_base < (sizeof(PROXYV2_SIG) + sizeof(struct proxyv2_hdr)) || + memcmp(base, PROXYV2_SIG, sizeof(PROXYV2_SIG)) != 0) + { + /* Failure. */ + return KNOT_EMALF; + } + + /* Read the PROXY v2 header. */ + struct proxyv2_hdr *hdr = base + sizeof(PROXYV2_SIG); + + /* + * Check that this is a version 2, command "PROXY" payload. + * + * XXX: The PROXY v2 spec mandates support for the "LOCAL" command + * (byte 0x20). + */ + if (hdr->ver_cmd != 0x21) { + /* Failure. */ + return KNOT_EMALF; + } + + /* + * Calculate the offset of the original DNS message inside the packet. + * This needs to account for the length of the PROXY v2 signature, + * PROXY v2 header, and the bytes of variable length PROXY v2 data + * following the PROXY v2 header. + */ + const size_t offset_dns = sizeof(PROXYV2_SIG) + + sizeof(struct proxyv2_hdr) + ntohs(hdr->len); + if (offset_dns < len_base) { + return offset_dns; + } + + return KNOT_EMALF; +} + +int proxyv2_addr_store(void *base, size_t len_base, struct sockaddr_storage *ss) +{ + /* + * Calculate the offset of the PROXY v2 address block. This is the data + * immediately following the PROXY v2 header. + */ + const size_t offset_proxy_addr = sizeof(PROXYV2_SIG) + + sizeof(struct proxyv2_hdr); + struct proxyv2_hdr *hdr = base + sizeof(PROXYV2_SIG); + + /* + * Handle proxied UDP-over-IPv4 and UDP-over-IPv6 packets. + */ + //TODO What about TCP? + if (hdr->fam_addr == 0x12) { + /* This is a proxied UDP-over-IPv4 packet. */ + struct proxyv2_addr_ipv4 *addr; + + /* + * Check that the packet is large enough to contain the IPv4 + * address block. + */ + if (offset_proxy_addr + sizeof(*addr) < len_base) { + /* Read the PROXY v2 address block. */ + addr = base + offset_proxy_addr; + + /* Copy the client's IPv4 address to the caller. */ + sockaddr_set_raw(ss, AF_INET, addr->src_addr, + sizeof(addr->src_addr)); + + /* Copy the client's port to the caller. */ + sockaddr_port_set(ss, ntohs(addr->src_port)); + + /* Success. */ + return KNOT_EOK; + } + } else if (hdr->fam_addr == 0x22) { + /* This is a proxied UDP-over-IPv6 packet. */ + struct proxyv2_addr_ipv6 *addr; + + /* + * Check that the packet is large enough to contain the IPv6 + * address block. + */ + if (offset_proxy_addr + sizeof(*addr) < len_base) { + /* Read the PROXY v2 address block. */ + addr = base + offset_proxy_addr; + + /* Copy the client's IPv6 address to the caller. */ + sockaddr_set_raw(ss, AF_INET6, addr->src_addr, + sizeof(addr->src_addr)); + + /* Copy the client's port to the caller. */ + sockaddr_port_set(ss, ntohs(addr->src_port)); + + /* Success. */ + return KNOT_EOK; + } + } + + /* Failure. */ + return KNOT_EMALF; +} + +int proxyv2_write_header(char *buf, size_t buflen, int socktype, const struct sockaddr *src, + const struct sockaddr *dst) +{ + if (buflen < PROXYV2_HEADER_MAXLEN) { + return KNOT_EINVAL; + } + + uint8_t fam_addr = 0; + int family = src->sa_family; + if (socktype == SOCK_DGRAM) { + fam_addr += 0x2; + } else if (socktype == SOCK_STREAM) { + fam_addr += 0x1; + } else { + return KNOT_EINVAL; + } + if (family == AF_INET) { + fam_addr += 0x10; + } else if (family == AF_INET6) { + fam_addr += 0x20; + } else { + return KNOT_EINVAL; + } + + struct proxyv2_hdr hdr = { + .ver_cmd = 0x21, + .fam_addr = fam_addr, + .len = (family == AF_INET) + ? htons(sizeof(struct proxyv2_addr_ipv4)) + : htons(sizeof(struct proxyv2_addr_ipv6)) + }; + + size_t offset = 0; + memcpy(buf, PROXYV2_SIG, sizeof(PROXYV2_SIG)); + offset += sizeof(PROXYV2_SIG); + memcpy(buf + offset, &hdr, sizeof(hdr)); + offset += sizeof(hdr); + + if (family == AF_INET) { + struct proxyv2_addr_ipv4 ipv4 = { 0 }; + struct sockaddr_in *p_src = (struct sockaddr_in *)src; + struct sockaddr_in *p_dst = (struct sockaddr_in *)dst; + memcpy(ipv4.src_addr, &p_src->sin_addr, sizeof(p_src->sin_addr)); + memcpy(ipv4.dst_addr, &p_dst->sin_addr, sizeof(p_dst->sin_addr)); + ipv4.src_port = p_src->sin_port; + ipv4.dst_port = p_dst->sin_port; + + // Store in buffer + memcpy(buf + offset, &ipv4, sizeof(ipv4)); + offset += sizeof(ipv4); + } else { + struct proxyv2_addr_ipv6 ipv6 = { 0 }; + struct sockaddr_in6 *p_src = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *p_dst = (struct sockaddr_in6 *)dst; + memcpy(ipv6.src_addr, &p_src->sin6_addr, sizeof(p_src->sin6_addr)); + memcpy(ipv6.dst_addr, &p_dst->sin6_addr, sizeof(p_dst->sin6_addr)); + ipv6.src_port = p_src->sin6_port; + ipv6.dst_port = p_dst->sin6_port; + + // Store in buffer + memcpy(buf + offset, &ipv6, sizeof(ipv6)); + offset += sizeof(ipv6); + } + + return offset; +} diff --git a/src/contrib/proxyv2/proxyv2.h b/src/contrib/proxyv2/proxyv2.h new file mode 100644 index 0000000..8fd8299 --- /dev/null +++ b/src/contrib/proxyv2/proxyv2.h @@ -0,0 +1,29 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stddef.h> +#include <sys/socket.h> + +extern const size_t PROXYV2_HEADER_MAXLEN; + +int proxyv2_header_offset(void *base, size_t len_base); + +int proxyv2_addr_store(void *base, size_t len_base, struct sockaddr_storage *ss); + +int proxyv2_write_header(char *buf, size_t buflen, int socktype, const struct sockaddr *src, + const struct sockaddr *dst); diff --git a/src/contrib/qp-trie/trie.c b/src/contrib/qp-trie/trie.c new file mode 100644 index 0000000..30dcb42 --- /dev/null +++ b/src/contrib/qp-trie/trie.c @@ -0,0 +1,1451 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + Copyright (C) 2018 Tony Finch <dot@dotat.at> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + The code originated from https://github.com/fanf2/qp/blob/master/qp.c + at revision 5f6d93753. + */ + +#include <assert.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +#include "contrib/qp-trie/trie.h" +#include "contrib/macros.h" +#include "contrib/mempattern.h" +#include "libknot/errcode.h" + +typedef unsigned int uint; +typedef uint64_t trie_index_t; /*!< nibble index into a key */ +typedef uint64_t word; /*!< A type-punned word */ +typedef uint bitmap_t; /*!< Bit-maps, using the range of 1<<0 to 1<<16 (inclusive). */ + +typedef char static_assert_pointer_fits_in_word + [sizeof(word) >= sizeof(uintptr_t) ? 1 : -1]; + +#define KEYLENBITS 31 + +/*! \brief trie keys have lengths + * + * 32 bits are enough for key lengths; probably even 16 bits would be. + * However, a 32 bit length means the alignment will be a multiple of + * 4, allowing us to stash the COW and BRANCH flags in the bottom bits + * of a pointer to a key. + * + * We need to steal a couple of bits from the length to keep the COW + * state of key allocations. + */ +typedef struct { + uint32_t cow:1, len:KEYLENBITS; + trie_key_t chars[]; +} tkey_t; + +/*! \brief A trie node is a pair of words. + * + * Each word is type-punned, depending on whether this is a branch + * node or a leaf node. We'll define some accessor functions to wrap + * this up into something reasonably safe. + * + * We aren't using a union to avoid problems with strict aliasing, and + * we aren't using bitfields because we want to control exactly which + * bits in the word are used by each field (in particular the flags). + * + * Branch nodes are never allocated individually: they are always part + * of either the root node or the twigs array of their parent branch. + * + * In a branch: + * + * `i` contains flags, bitmap, and index, explained in more detail below. + * + * `p` is a pointer to the "twigs", an array of child nodes. + * + * In a leaf: + * + * `i` is cast from a pointer to a tkey_t, with flags in the bottom bits. + * + * `p` is a trie_val_t. + */ +typedef struct node { + word i; + void *p; +} node_t; + +struct trie { + node_t root; // undefined when weight == 0, see empty_root() + size_t weight; + knot_mm_t mm; +}; + +/*! \brief size (in bits) of nibble (half-byte) indexes into keys + * + * The bottom bit is clear for the upper nibble, and set for the lower + * nibble, big-endian style, since the tree has to be in lexicographic + * order. The index increases from one branch node to the next as you + * go deeper into the trie. All the keys below a branch are identical + * up to the nibble identified by the branch. + * + * (see also tkey_t.len above) + */ +#define TWIDTH_INDEX 33 + +/*! \brief exclusive limit on indexes */ +#define TMAX_INDEX (BIG1 << TWIDTH_INDEX) + +/*! \brief size (in bits) of branch bitmap + * + * The bitmap indicates which subtries are present. The present child + * nodes are stored in the twigs array (with no holes between them). + * + * To simplify storing keys that are prefixes of each other, the + * end-of-string position is treated as an extra nibble value, ordered + * before all others. So there are 16 possible real nibble values, + * plus one value for nibbles past the end of the key. + */ +#define TWIDTH_BMP 17 + +/* + * We're constructing the layout of the branch `i` field in a careful + * way to avoid mistakes, getting the compiler to calculate values + * rather than typing them in by hand. + */ +enum { + TSHIFT_BRANCH = 0, + TSHIFT_COW, + TSHIFT_BMP, + TOP_BMP = TSHIFT_BMP + TWIDTH_BMP, + TSHIFT_INDEX = TOP_BMP, + TOP_INDEX = TSHIFT_INDEX + TWIDTH_INDEX, +}; + +typedef char static_assert_fields_fit_in_word + [TOP_INDEX <= sizeof(word) * CHAR_BIT ? 1 : -1]; + +typedef char static_assert_bmp_fits + [TOP_BMP <= sizeof(bitmap_t) * CHAR_BIT ? 1 : -1]; + +#define BIG1 ((word)1) +#define TMASK(width, shift) (((BIG1 << (width)) - BIG1) << (shift)) + +/*! \brief is this node a branch or a leaf? */ +#define TFLAG_BRANCH (BIG1 << TSHIFT_BRANCH) + +/*! \brief copy-on-write flag, used in both leaves and branches */ +#define TFLAG_COW (BIG1 << TSHIFT_COW) + +/*! \brief for extracting pointer to key */ +#define TMASK_LEAF (~(word)(TFLAG_BRANCH | TFLAG_COW)) + +/*! \brief mask for extracting nibble index */ +#define TMASK_INDEX TMASK(TWIDTH_INDEX, TSHIFT_INDEX) + +/*! \brief mask for extracting bitmap */ +#define TMASK_BMP TMASK(TWIDTH_BMP, TSHIFT_BMP) + +/*! \brief bitmap entry for NOBYTE */ +#define BMP_NOBYTE (BIG1 << TSHIFT_BMP) + +/*! \brief Initialize a new leaf, copying the key, and returning failure code. */ +static int mkleaf(node_t *leaf, const trie_key_t *key, uint32_t len, knot_mm_t *mm) +{ + if (unlikely((word)len > (BIG1 << KEYLENBITS))) + return KNOT_ENOMEM; + tkey_t *lkey = mm_alloc(mm, sizeof(tkey_t) + len); + if (unlikely(!lkey)) + return KNOT_ENOMEM; + lkey->cow = 0; + lkey->len = len; + memcpy(lkey->chars, key, len); + word i = (uintptr_t)lkey; + assert((i & TFLAG_BRANCH) == 0); + *leaf = (node_t){ .i = i, .p = NULL }; + return KNOT_EOK; +} + +/*! \brief construct a branch node */ +static node_t mkbranch(trie_index_t index, bitmap_t bmp, node_t *twigs) +{ + word i = TFLAG_BRANCH | bmp + | (index << TSHIFT_INDEX); + assert(index < TMAX_INDEX); + assert((bmp & ~TMASK_BMP) == 0); + return (node_t){ .i = i, .p = twigs }; +} + +/*! \brief Make an empty root node. */ +static node_t empty_root(void) +{ + return mkbranch(TMAX_INDEX-1, 0, NULL); +} + +/*! \brief Propagate error codes. */ +#define ERR_RETURN(x) \ + do { \ + int err_code_ = x; \ + if (unlikely(err_code_ != KNOT_EOK)) \ + return err_code_; \ + } while (false) + + +/*! \brief Test flags to determine type of this node. */ +static bool isbranch(const node_t *t) +{ + return t->i & TFLAG_BRANCH; +} + +static tkey_t *tkey(const node_t *t) +{ + assert(!isbranch(t)); + return (tkey_t *)(uintptr_t)(t->i & TMASK_LEAF); +} + +static trie_val_t *tvalp(node_t *t) +{ + assert(!isbranch(t)); + return &t->p; +} + +/*! \brief Given a branch node, return the index of the corresponding nibble in the key. */ +static trie_index_t branch_index(const node_t *t) +{ + assert(isbranch(t)); + return (t->i & TMASK_INDEX) >> TSHIFT_INDEX; +} + +static bitmap_t branch_bmp(const node_t *t) +{ + assert(isbranch(t)); + return (t->i & TMASK_BMP); +} + +/*! + * \brief Count the number of set bits. + * + * \TODO This implementation may be relatively slow on some HW. + */ +static uint branch_weight(const node_t *t) +{ + assert(isbranch(t)); + uint n = __builtin_popcount(t->i & TMASK_BMP); + assert(n > 1 && n <= TWIDTH_BMP); + return n; +} + +/*! \brief Compute offset of an existing child in a branch node. */ +static uint twigoff(const node_t *t, bitmap_t bit) +{ + assert(isbranch(t)); + assert(__builtin_popcount(bit) == 1); + return __builtin_popcount(t->i & TMASK_BMP & (bit - 1)); +} + +/*! \brief Extract a nibble from a key and turn it into a bitmask. */ +static bitmap_t keybit(trie_index_t ni, const trie_key_t *key, uint32_t len) +{ + trie_index_t bytei = ni >> 1; + + if (bytei >= len) + return BMP_NOBYTE; + + uint8_t ki = (uint8_t)key[bytei]; + uint nibble = (ni & 1) ? (ki & 0xf) : (ki >> 4); + + // skip one for NOBYTE nibbles after the end of the key + return BIG1 << (nibble + 1 + TSHIFT_BMP); +} + +/*! \brief Extract a nibble from a key and turn it into a bitmask. */ +static bitmap_t twigbit(const node_t *t, const trie_key_t *key, uint32_t len) +{ + assert(isbranch(t)); + return keybit(branch_index(t), key, len); +} + +/*! \brief Test if a branch node has a child indicated by a bitmask. */ +static bool hastwig(const node_t *t, bitmap_t bit) +{ + assert(isbranch(t)); + assert((bit & ~TMASK_BMP) == 0); + assert(__builtin_popcount(bit) == 1); + return t->i & bit; +} + +/*! \brief Get pointer to packed array of child nodes. */ +static node_t* twigs(node_t *t) +{ + assert(isbranch(t)); + return t->p; +} + +/*! \brief Get pointer to a particular child of a branch node. */ +static node_t* twig(node_t *t, uint i) +{ + assert(i < branch_weight(t)); + return twigs(t) + i; +} + +/*! \brief Get twig number of a child node TODO: better description. */ +static uint twig_number(node_t *child, node_t *parent) +{ + // twig array index using pointer arithmetic + ptrdiff_t num = child - twigs(parent); + assert(num >= 0 && num < branch_weight(parent)); + return (uint)num; +} + +/*! \brief Simple string comparator. */ +static int key_cmp(const trie_key_t *k1, uint32_t k1_len, + const trie_key_t *k2, uint32_t k2_len) +{ + int ret = memcmp(k1, k2, MIN(k1_len, k2_len)); + if (ret != 0) { + return ret; + } + + /* Key string is equal, compare lengths. */ + if (k1_len == k2_len) { + return 0; + } else if (k1_len < k2_len) { + return -1; + } else { + return 1; + } +} + +trie_t* trie_create(knot_mm_t *mm) +{ + trie_t *trie = mm_alloc(mm, sizeof(trie_t)); + if (trie != NULL) { + trie->root = empty_root(); + trie->weight = 0; + if (mm != NULL) + trie->mm = *mm; + else + mm_ctx_init(&trie->mm); + } + return trie; +} + +/*! \brief Free anything under the trie node, except for the passed pointer itself. */ +static void clear_trie(node_t *trie, knot_mm_t *mm) +{ + if (!isbranch(trie)) { + mm_free(mm, tkey(trie)); + } else { + uint n = branch_weight(trie); + for (uint i = 0; i < n; ++i) + clear_trie(twig(trie, i), mm); + mm_free(mm, twigs(trie)); + } +} + +void trie_free(trie_t *tbl) +{ + if (tbl == NULL) + return; + if (tbl->weight) + clear_trie(&tbl->root, &tbl->mm); + mm_free(&tbl->mm, tbl); +} + +void trie_clear(trie_t *tbl) +{ + assert(tbl); + if (!tbl->weight) + return; + clear_trie(&tbl->root, &tbl->mm); + tbl->root = empty_root(); + tbl->weight = 0; +} + +static bool dup_trie(node_t *copy, const node_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm) +{ + if (isbranch(orig)) { + uint n = branch_weight(orig); + node_t *cotw = mm_alloc(mm, n * sizeof(*cotw)); + if (cotw == NULL) { + return NULL; + } + const node_t *ortw = twigs((node_t *)orig); + for (uint i = 0; i < n; ++i) { + if (!dup_trie(cotw + i, ortw + i, dup_cb, mm)) { + while (i-- > 0) { + clear_trie(cotw + i, mm); + } + mm_free(mm, cotw); + return false; + } + } + *copy = mkbranch(branch_index(orig), branch_bmp(orig), cotw); + } else { + tkey_t *key = tkey(orig); + if (mkleaf(copy, key->chars, key->len, mm) != KNOT_EOK) { + return false; + } + if ((copy->p = dup_cb(orig->p, mm)) == NULL) { + mm_free(mm, tkey(copy)); + return false; + } + } + return true; +} + +trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm) +{ + if (orig == NULL) { + return NULL; + } + trie_t *copy = mm_alloc(mm, sizeof(*copy)); + if (copy == NULL) { + return NULL; + } + copy->weight = orig->weight; + if (mm != NULL) { + copy->mm = *mm; + } else { + mm_ctx_init(©->mm); + } + if (copy->weight) { + if (!dup_trie(©->root, &orig->root, dup_cb, mm)) { + mm_free(mm, copy); + return NULL; + } + } + return copy; +} + +size_t trie_weight(const trie_t *tbl) +{ + assert(tbl); + return tbl->weight; +} + +trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len) +{ + assert(tbl); + if (!tbl->weight) + return NULL; + node_t *t = &tbl->root; + while (isbranch(t)) { + __builtin_prefetch(twigs(t)); + bitmap_t b = twigbit(t, key, len); + if (!hastwig(t, b)) + return NULL; + t = twig(t, twigoff(t, b)); + } + tkey_t *lkey = tkey(t); + if (key_cmp(key, len, lkey->chars, lkey->len) != 0) + return NULL; + return tvalp(t); +} + +/* Optimization: the approach isn't ideal, as e.g. walking through the prefix + * is duplicated and we explicitly construct the wildcard key. Still, it's close + * to optimum which would be significantly more complicated and error-prone to write. */ +trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len) +{ + assert(tbl); + if (!tbl->weight) + return NULL; + // Find leaf sharing the longest common prefix; see ns_find_branch() for explanation. + node_t *t = &tbl->root; + while (isbranch(t)) { + __builtin_prefetch(twigs(t)); + bitmap_t b = twigbit(t, key, len); + uint i = hastwig(t, b) ? twigoff(t, b) : 0; + t = twig(t, i); + } + const tkey_t * const lcp_key = tkey(t); + + // Find the last matching zero byte or -1 (source of synthesis) + int i_lmz = -1; + for (int i = 0; i < len && i < lcp_key->len && key[i] == lcp_key->chars[i]; ++i) { + if (key[i] == '\0' && i < len - 1) // do not count the terminating zero + i_lmz = i; + // Shortcut: we may have found an exact match. + if (i == len - 1 && len == lcp_key->len) + return tvalp(t); + } + if (len == 0) // The empty name needs separate handling. + return lcp_key->len == 0 ? tvalp(t) : NULL; + + // Construct the key of the wildcard we need and look it up. + const int wild_len = i_lmz + 3; + uint8_t wild_key[wild_len]; + memcpy(wild_key, key, wild_len - 2); + wild_key[wild_len - 2] = '*'; + wild_key[wild_len - 1] = '\0'; // LF is always 0-terminated ATM + return trie_get_try(tbl, wild_key, wild_len); +} + +/*! \brief Delete leaf t with parent p; b is the bit for t under p. + * Optionally return the deleted value via val. The function can't fail. */ +static void del_found(trie_t *tbl, node_t *t, node_t *p, bitmap_t b, trie_val_t *val) +{ + assert(!tkey(t)->cow); + mm_free(&tbl->mm, tkey(t)); + if (val != NULL) + *val = *tvalp(t); // we return trie_val_t directly when deleting + --tbl->weight; + if (unlikely(!p)) { // whole trie was a single leaf + assert(tbl->weight == 0); + tbl->root = empty_root(); + return; + } + // remove leaf t as child of p + node_t *tp = twigs(p); + uint ci = twig_number(t, p); + uint cc = branch_weight(p); // child count + + if (cc == 2) { + // collapse binary node p: move the other child to the parent + *p = tp[1 - ci]; + mm_free(&tbl->mm, tp); + return; + } + memmove(tp + ci, tp + ci + 1, sizeof(node_t) * (cc - ci - 1)); + p->i &= ~b; + node_t *newt = mm_realloc(&tbl->mm, tp, sizeof(node_t) * (cc - 1), + sizeof(node_t) * cc); + if (likely(newt != NULL)) + p->p = newt; + // We can ignore mm_realloc failure because an oversized twig + // array is OK - only beware that next time the prev_size + // passed to mm_realloc will not be correct; TODO? +} + +int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val) +{ + assert(tbl); + if (!tbl->weight) + return KNOT_ENOENT; + node_t *t = &tbl->root; // current and parent node + node_t *p = NULL; + bitmap_t b = 0; + while (isbranch(t)) { + __builtin_prefetch(twigs(t)); + b = twigbit(t, key, len); + if (!hastwig(t, b)) + return KNOT_ENOENT; + p = t; + t = twig(t, twigoff(t, b)); + } + tkey_t *lkey = tkey(t); + if (key_cmp(key, len, lkey->chars, lkey->len) != 0) + return KNOT_ENOENT; + del_found(tbl, t, p, b, val); + return KNOT_EOK; +} + +/*! + * \brief Stack of nodes, storing a path down a trie. + * + * The structure also serves directly as the public trie_it_t type, + * in which case it always points to the current leaf, unless we've finished + * (i.e. it->len == 0). + * stack[0] is always a valid pointer to the root -> ns_gettrie() + */ +typedef struct trie_it { + node_t* *stack; /*!< The stack; malloc is used directly instead of mm. */ + uint32_t len; /*!< Current length of the stack. */ + uint32_t alen; /*!< Allocated/available length of the stack. */ + /*! \brief Initial storage for \a stack; it should fit in most use cases. */ + node_t* stack_init[250]; +} nstack_t; + +/*! \brief Create a node stack containing just the root (or empty). */ +static void ns_init(nstack_t *ns, trie_t *tbl) +{ + assert(tbl); + ns->stack = ns->stack_init; + ns->alen = sizeof(ns->stack_init) / sizeof(ns->stack_init[0]); + ns->stack[0] = &tbl->root; + ns->len = (tbl->weight > 0); +} + +static inline trie_t * ns_gettrie(nstack_t *ns) +{ + assert(ns && ns->stack && ns->stack[0]); + return (struct trie *)ns->stack[0]; +} + +/*! \brief Free inside of the stack, i.e. not the passed pointer itself. */ +static void ns_cleanup(nstack_t *ns) +{ + assert(ns && ns->stack); + if (likely(ns->stack == ns->stack_init)) + return; + free(ns->stack); + #ifndef NDEBUG + ns->stack = NULL; + ns->alen = 0; + #endif +} + +/*! \brief Allocate more space for the stack. */ +static int ns_longer_alloc(nstack_t *ns) +{ + ns->alen *= 2; + size_t new_size = ns->alen * sizeof(node_t *); + node_t **st; + if (ns->stack == ns->stack_init) { + st = malloc(new_size); + if (st != NULL) + memcpy(st, ns->stack, ns->len * sizeof(node_t *)); + } else { + st = realloc(ns->stack, new_size); + } + if (st == NULL) + return KNOT_ENOMEM; + ns->stack = st; + return KNOT_EOK; +} + +/*! \brief Ensure the node stack can be extended by one. */ +static inline int ns_longer(nstack_t *ns) +{ + // get a longer stack if needed + if (likely(ns->len < ns->alen)) + return KNOT_EOK; + return ns_longer_alloc(ns); // hand-split the part suitable for inlining +} + +/*! + * \brief Find the "branching point" as if searching for a key. + * + * The whole path to the point is kept on the passed stack; + * always at least the root will remain on the top of it. + * Beware: the precise semantics of this function is rather tricky. + * The top of the stack will contain: the corresponding leaf if exact + * match is found; or the immediate node below a + * branching-point-on-edge or the branching-point itself. + * + * \param idiff Set the index of first differing nibble, or TMAX_INDEX for an exact match + * \param tbit Set the bit of the closest leaf's nibble at index idiff + * \param kbit Set the bit of the key's nibble at index idiff + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_find_branch(nstack_t *ns, const trie_key_t *key, uint32_t len, + trie_index_t *idiff, bitmap_t *tbit, bitmap_t *kbit) +{ + assert(ns && ns->len && idiff); + // First find some leaf with longest matching prefix. + while (isbranch(ns->stack[ns->len - 1])) { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + __builtin_prefetch(twigs(t)); + bitmap_t b = twigbit(t, key, len); + // Even if our key is missing from this branch we need to + // keep iterating down to a leaf. It doesn't matter which + // twig we choose since the keys are all the same up to this + // index. Note that blindly using twigoff(t, b) can cause + // an out-of-bounds index if it equals twigmax(t). + uint i = hastwig(t, b) ? twigoff(t, b) : 0; + ns->stack[ns->len++] = twig(t, i); + } + tkey_t *lkey = tkey(ns->stack[ns->len-1]); + // Find index of the first char that differs. + size_t bytei = 0; + uint32_t klen = lkey->len; + for (bytei = 0; bytei < MIN(len,klen); bytei++) { + if (key[bytei] != lkey->chars[bytei]) + break; + } + // Find which half-byte has matched. + trie_index_t index = bytei << 1; + if (bytei == len && len == lkey->len) { // found equivalent key + index = TMAX_INDEX; + goto success; + } + if (likely(bytei < MIN(len,klen))) { + uint8_t k2 = (uint8_t)lkey->chars[bytei]; + uint8_t k1 = (uint8_t)key[bytei]; + if (((k1 ^ k2) & 0xf0) == 0) + index += 1; + } + // now go up the trie from the current leaf + node_t *t; + do { + if (unlikely(ns->len == 1)) + goto success; // only the root stays on the stack + t = ns->stack[ns->len - 2]; + if (branch_index(t) < index) + goto success; + --ns->len; + } while (true); +success: + #ifndef NDEBUG // invariants on successful return + assert(ns->len); + if (isbranch(ns->stack[ns->len - 1])) { + t = ns->stack[ns->len - 1]; + assert(branch_index(t) >= index); + } + if (ns->len > 1) { + t = ns->stack[ns->len - 2]; + assert(branch_index(t) < index || index == TMAX_INDEX); + } + #endif + *idiff = index; + *tbit = keybit(index, lkey->chars, lkey->len); + *kbit = keybit(index, key, len); + return KNOT_EOK; +} + +/*! + * \brief Advance the node stack to the last leaf in the subtree. + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_last_leaf(nstack_t *ns) +{ + assert(ns); + do { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + if (!isbranch(t)) + return KNOT_EOK; + uint lasti = branch_weight(t) - 1; + ns->stack[ns->len++] = twig(t, lasti); + } while (true); +} + +/*! + * \brief Advance the node stack to the first leaf in the subtree. + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_first_leaf(nstack_t *ns) +{ + assert(ns && ns->len); + do { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + if (!isbranch(t)) + return KNOT_EOK; + ns->stack[ns->len++] = twig(t, 0); + } while (true); +} + +/*! + * \brief Advance the node stack to the leaf that is previous to the current node. + * + * \note Prefix leaf under the current node DOES count (if present; perhaps questionable). + * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM. + */ +static int ns_prev_leaf(nstack_t *ns) +{ + assert(ns && ns->len > 0); + + node_t *t = ns->stack[ns->len - 1]; + // Beware: BMP_NOBYTE child is ordered *before* its parent. + if (isbranch(t) && hastwig(t, BMP_NOBYTE)) { + ERR_RETURN(ns_longer(ns)); + ns->stack[ns->len++] = twig(t, 0); + return KNOT_EOK; + } + + for (; ns->len >= 2; --ns->len) { + t = ns->stack[ns->len - 1]; + node_t *p = ns->stack[ns->len - 2]; + uint ci = twig_number(t, p); + if (ci == 0) // we've got to go up again + continue; + // t isn't the first child -> go down the previous one + ns->stack[ns->len - 1] = twig(p, ci - 1); + return ns_last_leaf(ns); + } + return KNOT_ENOENT; // root without empty key has no previous leaf +} + +/*! + * \brief Advance the node stack to the leaf that is successor to the current node. + * + * \param skip_prefixed skip any nodes whose key is a prefix of the current one. + * If false, prefix leaf or anything else under the current node DOES count. + * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM. + */ +static int ns_next_leaf(nstack_t *ns, const bool skip_pefixed) +{ + assert(ns && ns->len > 0); + + node_t *t = ns->stack[ns->len - 1]; + if (!skip_pefixed && isbranch(t)) + return ns_first_leaf(ns); + for (; ns->len >= 2; --ns->len) { + t = ns->stack[ns->len - 1]; + node_t *p = ns->stack[ns->len - 2]; + uint ci = twig_number(t, p); + if (skip_pefixed && ci == 0 && hastwig(t, BMP_NOBYTE)) { + // Keys in the subtree of p are suffixes of the key of t, + // so we've got to go one level higher + // (this can't happen more than once) + continue; + } + uint cc = branch_weight(p); + assert(ci + 1 <= cc); + if (ci + 1 == cc) { + // t is the last child of p, so we need to keep climbing + continue; + } + // go down the next child of p + ns->stack[ns->len - 1] = twig(p, ci + 1); + return ns_first_leaf(ns); + } + return KNOT_ENOENT; // not found, as no more parent is available +} + +/*! \brief Advance the node stack to leaf with longest prefix of the current key. */ +static int ns_prefix(nstack_t *ns) +{ + assert(ns && ns->len > 0); + const node_t *start = ns->stack[ns->len - 1]; + // Walk up the trie until we find a BMP_NOBYTE child. + while (--ns->len > 0) { + node_t *p = ns->stack[ns->len - 1]; + if (!hastwig(p, BMP_NOBYTE)) + continue; + node_t *end = twig(p, 0); + // In case we started in a BMP_NOBYTE leaf, the first step up + // did NOT shorten the key and we would get back into the same + // node again. + if (end == start) + continue; + ns->stack[ns->len++] = end; + return KNOT_EOK; + } + return KNOT_ENOENT; // not found, as no more parent is available +} + +/*! \brief less-or-equal search. + * + * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found, + * or KNOT_E*. + */ +static int ns_get_leq(nstack_t *ns, const trie_key_t *key, uint32_t len) +{ + // First find the key with longest-matching prefix + trie_index_t idiff; + bitmap_t tbit, kbit; + ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit)); + node_t *t = ns->stack[ns->len - 1]; + if (idiff == TMAX_INDEX) // found exact match + return KNOT_EOK; + // Get t: the last node on matching path + bitmap_t b; + if (isbranch(t) && branch_index(t) == idiff) { + // t is OK + b = kbit; + } else { + // the top of the stack was the first unmatched node -> step up + if (ns->len == 1) { + // root was unmatched already + if (kbit < tbit) + return KNOT_ENOENT; + ERR_RETURN(ns_last_leaf(ns)); + return 1; + } + --ns->len; + t = ns->stack[ns->len - 1]; + b = twigbit(t, key, len); + } + // Now we re-do the first "non-matching" step in the trie + // but try the previous child if key was less (it may not exist) + int i = hastwig(t, b) + ? (int)twigoff(t, b) - (kbit < tbit) + : (int)twigoff(t, b) - 1 /* twigoff returns successor when !hastwig */; + if (i >= 0) { + ERR_RETURN(ns_longer(ns)); + ns->stack[ns->len++] = twig(t, i); + ERR_RETURN(ns_last_leaf(ns)); + } else { + ERR_RETURN(ns_prev_leaf(ns)); + } + return 1; +} + +int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val) +{ + assert(tbl && val); + if (tbl->weight == 0) { + if (val) *val = NULL; + return KNOT_ENOENT; + } + // We try to do without malloc. + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + + int ret = ns_get_leq(ns, key, len); + if (ret == KNOT_EOK || ret == 1) { + assert(!isbranch(ns->stack[ns->len - 1])); + if (val) *val = tvalp(ns->stack[ns->len - 1]); + } else { + if (val) *val = NULL; + } + ns_cleanup(ns); + return ret; +} + +int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len) +{ + assert(it && it->stack[0] && it->alen); + const trie_t *tbl = ns_gettrie(it); + if (tbl->weight == 0) { + it->len = 0; + return KNOT_ENOENT; + } + it->len = 1; + int ret = ns_get_leq(it, key, len); + if (ret == KNOT_EOK || ret == 1) { + assert(trie_it_key(it, NULL)); + } else { + it->len = 0; + } + return ret; +} + +/* see below */ +static int cow_pushdown(trie_cow_t *cow, nstack_t *ns); + +/*! \brief implementation of trie_get_ins() and trie_get_cow() */ +static trie_val_t* cow_get_ins(trie_cow_t *cow, trie_t *tbl, + const trie_key_t *key, uint32_t len) +{ + assert(tbl); + // First leaf in an empty tbl? + if (unlikely(!tbl->weight)) { + if (unlikely(mkleaf(&tbl->root, key, len, &tbl->mm))) + return NULL; + ++tbl->weight; + return tvalp(&tbl->root); + } + { // Intentionally un-indented; until end of function, to bound cleanup attr. + // Find the branching-point + __attribute__((cleanup(ns_cleanup))) + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + trie_index_t idiff; + bitmap_t tbit, kbit; + if (unlikely(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit))) + return NULL; + if (unlikely(cow && cow_pushdown(cow, ns) != KNOT_EOK)) + return NULL; + node_t *t = ns->stack[ns->len - 1]; + if (idiff == TMAX_INDEX) // the same key was already present + return tvalp(t); + node_t leaf, *leafp; + if (unlikely(mkleaf(&leaf, key, len, &tbl->mm))) + return NULL; + + if (isbranch(t) && branch_index(t) == idiff) { + // The node t needs a new leaf child. + assert(!hastwig(t, kbit)); + // new child position and original child count + uint s = twigoff(t, kbit); + uint m = branch_weight(t); + node_t *nt = mm_realloc(&tbl->mm, twigs(t), + sizeof(node_t) * (m + 1), sizeof(node_t) * m); + if (unlikely(!nt)) + goto err_leaf; + memmove(nt + s + 1, nt + s, sizeof(node_t) * (m - s)); + leafp = nt + s; + *t = mkbranch(idiff, branch_bmp(t) | kbit, nt); + } else { + // We need to insert a new binary branch with leaf at *t. + // Note: it works the same for the case where we insert above root t. + #ifndef NDEBUG + if (ns->len > 1) { + node_t *pt = ns->stack[ns->len - 2]; + assert(hastwig(pt, twigbit(pt, key, len))); + } + #endif + node_t *nt = mm_alloc(&tbl->mm, sizeof(node_t) * 2); + if (unlikely(!nt)) + goto err_leaf; + node_t t2 = *t; // Save before overwriting t. + *t = mkbranch(idiff, tbit | kbit, nt); + *twig(t, twigoff(t, tbit)) = t2; + leafp = twig(t, twigoff(t, kbit)); + }; + *leafp = leaf; + ++tbl->weight; + return tvalp(leafp); +err_leaf: + mm_free(&tbl->mm, tkey(&leaf)); + return NULL; + } +} + +trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len) +{ + return cow_get_ins(NULL, tbl, key, len); +} + +/*! \brief Apply a function to every trie_val_t*, in order; a recursive solution. */ +static int apply_nodes(node_t *t, int (*f)(trie_val_t *, void *), void *d) +{ + assert(t); + if (!isbranch(t)) + return f(tvalp(t), d); + uint n = branch_weight(t); + for (uint i = 0; i < n; ++i) + ERR_RETURN(apply_nodes(twig(t, i), f, d)); + return KNOT_EOK; +} + +int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d) +{ + assert(tbl && f); + if (!tbl->weight) + return KNOT_EOK; + return apply_nodes(&tbl->root, f, d); +} + +/* These are all thin wrappers around static Tns* functions. */ +trie_it_t* trie_it_begin(trie_t *tbl) +{ + assert(tbl); + trie_it_t *it = malloc(sizeof(nstack_t)); + if (!it) + return NULL; + ns_init(it, tbl); + if (it->len == 0) // empty tbl + return it; + if (ns_first_leaf(it)) { + ns_cleanup(it); + free(it); + return NULL; + } + return it; +} + +bool trie_it_finished(trie_it_t *it) +{ + assert(it); + return it->len == 0; +} + +void trie_it_free(trie_it_t *it) +{ + if (!it) + return; + ns_cleanup(it); + free(it); +} + +trie_it_t *trie_it_clone(const trie_it_t *it) +{ + if (!it) // TODO: or should that be an assertion? + return NULL; + trie_it_t *it2 = malloc(sizeof(nstack_t)); + if (!it2) + return NULL; + it2->len = it->len; + it2->alen = it->alen; // we _might_ change it in the rare malloc case, but... + if (likely(it->stack == it->stack_init)) { + it2->stack = it2->stack_init; + assert(it->alen == sizeof(it->stack_init) / sizeof(it->stack_init[0])); + } else { + it2->stack = malloc(it2->alen * sizeof(it2->stack[0])); + if (!it2->stack) { + free(it2); + return NULL; + } + } + memcpy(it2->stack, it->stack, it->len * sizeof(it->stack[0])); + return it2; +} + +const trie_key_t* trie_it_key(trie_it_t *it, size_t *len) +{ + assert(it && it->len); + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + tkey_t *key = tkey(t); + if (len) + *len = key->len; + return key->chars; +} + +trie_val_t* trie_it_val(trie_it_t *it) +{ + assert(it && it->len); + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + return tvalp(t); +} + +void trie_it_next(trie_it_t *it) +{ + assert(it && it->len); + if (ns_next_leaf(it, false) != KNOT_EOK) + it->len = 0; +} + +void trie_it_next_loop(trie_it_t *it) +{ + assert(it && it->len); + int ret = ns_next_leaf(it, false); + if (ret == KNOT_ENOENT) { + it->len = 1; + ret = ns_first_leaf(it); + } + if (ret) + it->len = 0; +} + +void trie_it_next_nosuffix(trie_it_t *it) +{ + assert(it && it->len); + if (ns_next_leaf(it, true) != KNOT_EOK) + it->len = 0; +} + +void trie_it_prev(trie_it_t *it) +{ + assert(it && it->len); + if (ns_prev_leaf(it) != KNOT_EOK) + it->len = 0; +} + +void trie_it_prev_loop(trie_it_t *it) +{ + assert(it && it->len); + int ret = ns_prev_leaf(it); + if (ret == KNOT_ENOENT) { + it->len = 1; + ret = ns_last_leaf(it); + } + if (ret) + it->len = 0; +} + +void trie_it_parent(trie_it_t *it) +{ + assert(it && it->len); + if (ns_prefix(it)) + it->len = 0; +} + +void trie_it_del(trie_it_t *it) +{ + assert(it && it->len); + if (it->len == 0) + return; + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + bitmap_t b; // del_found() needs to know which bit to zero in the bitmap + node_t *p; + if (it->len == 1) { // deleting the root + p = NULL; + b = 0; // unused + } else { + p = it->stack[it->len - 2]; + assert(isbranch(p)); + size_t len; + const trie_key_t *key = trie_it_key(it, &len); + b = twigbit(p, key, len); + } + // We could trie_it_{next,prev,...}(it) now, in case we wanted that semantics. + it->len = 0; + del_found(ns_gettrie(it), t, p, b, NULL); +} + + +/*!\file + * + * \section About copy-on-write + * + * In these notes I'll use the term "object" to refer to either the + * twig array of a branch, or the application's data that is referred + * to by a leaf's trie_val_t pointer. Note that for COW we don't care + * about trie node_t structs themselves, but the objects that they + * point to. + * + * \subsection COW states + * + * During a COW transaction an object can be in one of three states: + * shared, only in the old trie, or only in the new trie. When a + * transaction is rolled back, the only-new objects are freed; when a + * transaction is committed the new trie takes the place of the old + * one and only-old objects are freed. + * + * \subsection branch marks and regions + * + * A branch object can be marked by setting the COW flag in the first + * element of its twig array. Marked branches partition the trie into + * regions; an object's state depends on its region. + * + * The unmarked branch objects between a trie's root and the marked + * branches (excluding the marked branches themselves) is exclusively + * owned: either old-only (if you started from the old root) or + * new-only (if you started from the new root). + * + * Marked branch objects, and all objects reachable from marked branch + * objects, are in the shared region accessible from both old and new + * roots. All branch objects below a marked branch must be unmarked. + * (That is, there is at most one marked branch object on any path + * from the root of a trie.) + * + * Branch nodes in the new-only region can be modified in place, in + * the same way as an original qp trie. Branch nodes in the old-only + * or shared regions must not be modified. + * + * \subsection app object states + * + * The app objects reachable from the new-only and old-only regions + * explicitly record their state in a way determined by the + * application. (These app objects are reachable from the old and new + * roots by traversing only unmarked branch objects.) + * + * The app objects reachable from marked branch objects are implicitly + * shared, but their state field has an indeterminate value. If an app + * object was previously touched by a rolled-back transaction it may + * be marked shared or old-only; if it was previously touched by a + * committed transaction it may be marked shared or new-only. + * + * \subsection key states + * + * The memory allocated for tkey_t objects also needs to track its + * sharing state. They have a "cow" flag to mark when they are shared. + * Keys are relatively lazily copied (to make them exclusive) when + * their leaf node is touched by a COW mutation. + * + * [An alternative technique might be to copy them more eagerly, in + * cow_pushdown(), which would avoid the need for a flag bit at the + * cost of more allocator churn in a transaction.] + * + * \subsection outside COW + * + * When a COW transaction is not in progress, there are no marked + * branch objects, so everything is exclusively owned. When a COW + * transaction is finished (committed or rolled back), the branch + * marks are removed. Since they are in the shared region, this branch + * cleanup is visible to both old and new tries. + * + * However the state of app objects is not clean between COW + * transactions. When a COW transaction is committed, we traverse the + * old-only region to find old-only app objects that should be freed + * (and vice versa for rollback). In general, there will be app + * objects that are only reachable from the new-only region, and that + * have a mixture of shared and new states. + */ + +/*! \brief Trie copy-on-write state */ +struct trie_cow { + trie_t *old; + trie_t *new; + trie_cb *mark_shared; + void *d; +}; + +/*! \brief is this a marked branch object */ +static bool cow_marked(node_t *t) +{ + return isbranch(t) && (twigs(t)->i & TFLAG_COW); +} + +/*! \brief is this a leaf with a marked key */ +static bool cow_key(node_t *t) +{ + return !isbranch(t) && tkey(t)->cow; +} + +/*! \brief remove mark from a branch object */ +static void clear_cow(node_t *t) +{ + assert(isbranch(t)); + twigs(t)->i &= ~TFLAG_COW; +} + +/*! \brief mark a node as shared + * + * For branches this marks the twig array (in COW terminology, the + * branch object); for leaves it uses the callback to mark the app + * object. + */ +static void mark_cow(trie_cow_t *cow, node_t *t) +{ + if (isbranch(t)) { + node_t *object = twigs(t); + object->i |= TFLAG_COW; + } else { + tkey_t *lkey = tkey(t); + lkey->cow = 1; + if (cow->mark_shared != NULL) { + trie_val_t *valp = tvalp(t); + cow->mark_shared(*valp, lkey->chars, lkey->len, cow->d); + } + } +} + +/*! \brief push exclusive COW region down one node */ +static int cow_pushdown_one(trie_cow_t *cow, node_t *t) +{ + uint cc = branch_weight(t); + node_t *nt = mm_alloc(&cow->new->mm, sizeof(node_t) * cc); + if (nt == NULL) + return KNOT_ENOMEM; + /* mark all the children */ + for (uint ci = 0; ci < cc; ++ci) + mark_cow(cow, twig(t, ci)); + /* this node must be unmarked in both old and new versions */ + clear_cow(t); + t->p = memcpy(nt, twigs(t), sizeof(node_t) * cc); + return KNOT_EOK; +} + +/*! \brief push exclusive COW region to cover a whole node stack */ +static int cow_pushdown(trie_cow_t *cow, nstack_t *ns) +{ + node_t *new_twigs = NULL; + node_t *old_twigs = NULL; + for (uint i = 0; i < ns->len; i++) { + /* if we did a pushdown on the previous iteration, we + need to update this stack entry so it points into + the parent's new twigs instead of the old ones */ + if (new_twigs != old_twigs) + ns->stack[i] = new_twigs + (ns->stack[i] - old_twigs); + if (cow_marked(ns->stack[i])) { + old_twigs = twigs(ns->stack[i]); + if (cow_pushdown_one(cow, ns->stack[i])) + return KNOT_ENOMEM; + new_twigs = twigs(ns->stack[i]); + } else { + new_twigs = NULL; + old_twigs = NULL; + /* ensure key is exclusively owned */ + if (cow_key(ns->stack[i])) { + node_t oleaf = *ns->stack[i]; + tkey_t *okey = tkey(&oleaf); + if(mkleaf(ns->stack[i], okey->chars, okey->len, + &cow->new->mm)) + return KNOT_ENOMEM; + ns->stack[i]->p = oleaf.p; + okey->cow = 0; + } + } + } + return KNOT_EOK; +} + +trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d) +{ + knot_mm_t *mm = &old->mm; + trie_t *new = mm_alloc(mm, sizeof(trie_t)); + trie_cow_t *cow = mm_alloc(mm, sizeof(trie_cow_t)); + if (new == NULL || cow == NULL) { + mm_free(mm, new); + mm_free(mm, cow); + return NULL; + } + new->mm = old->mm; + new->root = old->root; + new->weight = old->weight; + cow->old = old; + cow->new = new; + cow->mark_shared = mark_shared; + cow->d = d; + if (old->weight) + mark_cow(cow, &old->root); + return cow; +} + +trie_t* trie_cow_new(trie_cow_t *cow) +{ + assert(cow != NULL); + return cow->new; +} + +trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len) +{ + return cow_get_ins(cow, cow->new, key, len); +} + +int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val) +{ + trie_t *tbl = cow->new; + if (unlikely(!tbl->weight)) + return KNOT_ENOENT; + { // Intentionally un-indented; until end of function, to bound cleanup attr. + // Find the branching-point + __attribute__((cleanup(ns_cleanup))) + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + trie_index_t idiff; + bitmap_t tbit, kbit; + ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit)); + if (idiff != TMAX_INDEX) + return KNOT_ENOENT; + ERR_RETURN(cow_pushdown(cow, ns)); + node_t *t = ns->stack[ns->len - 1]; + node_t *p = ns->len >= 2 ? ns->stack[ns->len - 2] : NULL; + del_found(tbl, t, p, p ? twigbit(p, key, len) : 0, val); + } + return KNOT_EOK; +} + +/*! \brief clean up after a COW transaction, recursively */ +static void cow_cleanup(trie_cow_t *cow, node_t *t, trie_cb *cb, void *d) +{ + if (cow_marked(t)) { + // we have hit the shared region, so just reset the mark + clear_cow(t); + return; + } else if (isbranch(t)) { + // traverse and free the exclusive region + uint cc = branch_weight(t); + for (uint ci = 0; ci < cc; ++ci) + cow_cleanup(cow, twig(t, ci), cb, d); + mm_free(&cow->new->mm, twigs(t)); + return; + } else { + // application must decide how to clean up its values + tkey_t *lkey = tkey(t); + if (cb != NULL) { + trie_val_t *valp = tvalp(t); + cb(*valp, lkey->chars, lkey->len, d); + } + // clean up exclusively-owned keys + if (lkey->cow) + lkey->cow = 0; + else + mm_free(&cow->new->mm, lkey); + return; + } +} + +trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d) +{ + trie_t *ret = cow->new; + if (cow->old->weight) + cow_cleanup(cow, &cow->old->root, cb, d); + mm_free(&ret->mm, cow->old); + mm_free(&ret->mm, cow); + return ret; +} + +trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d) +{ + trie_t *ret = cow->old; + if (cow->new->weight) + cow_cleanup(cow, &cow->new->root, cb, d); + mm_free(&ret->mm, cow->new); + mm_free(&ret->mm, cow); + return ret; +} diff --git a/src/contrib/qp-trie/trie.h b/src/contrib/qp-trie/trie.h new file mode 100644 index 0000000..d479d3e --- /dev/null +++ b/src/contrib/qp-trie/trie.h @@ -0,0 +1,280 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + Copyright (C) 2018 Tony Finch <dot@dotat.at> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include "libknot/mm_ctx.h" + +/*! + * \brief Native API of QP-tries: + * + * - keys are uint8_t strings, not necessarily zero-terminated, + * the structure copies the contents of the passed keys + * - values are void* pointers, typically you get an ephemeral pointer to it + * - key lengths are limited by 2^32-1 ATM + */ + +/*! \brief Element value. */ +typedef void* trie_val_t; +/*! \brief Key for indexing tries. Sign could be flipped easily. */ +typedef uint8_t trie_key_t; + +/*! \brief Opaque structure holding a QP-trie. */ +typedef struct trie trie_t; + +/*! \brief Opaque type for holding a QP-trie iterator. */ +typedef struct trie_it trie_it_t; + +/*! \brief Callback for cloning trie values. */ +typedef trie_val_t (*trie_dup_cb)(const trie_val_t val, knot_mm_t *mm); + +/*! \brief Callback for performing actions on a trie leaf + * + * Used during copy-on-write transactions + * + * \param val The value of the element to be altered + * \param key The key of the element to be altered + * \param len The length of key + * \param d Additional user data + */ +typedef void trie_cb(trie_val_t val, const trie_key_t *key, size_t len, void *d); + +/*! \brief Opaque type for holding the copy-on-write state for a QP-trie. */ +typedef struct trie_cow trie_cow_t; + +/*! \brief Create a trie instance. */ +trie_t* trie_create(knot_mm_t *mm); + +/*! \brief Free a trie instance. */ +void trie_free(trie_t *tbl); + +/*! \brief Clear a trie instance (make it empty). */ +void trie_clear(trie_t *tbl); + +/*! \brief Create a clone of existing trie. */ +trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm); + +/*! \brief Return the number of keys in the trie. */ +size_t trie_weight(const trie_t *tbl); + +/*! \brief Search the trie, returning NULL on failure. */ +trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len); + +/*! \brief Search the trie including DNS wildcard semantics, returning NULL on failure. + * + * \note We assume the key is in knot_dname_lf() format, i.e. labels are ordered + * from root to leaf and separated by zero bytes (and no other zeros are allowed). + * \note Beware that DNS wildcard matching is not exactly what normal people would expect. + */ +trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len); + +/*! \brief Search the trie, inserting NULL trie_val_t on failure. */ +trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len); + +/*! + * \brief Search for less-or-equal element. + * + * \param tbl Trie. + * \param key Searched key. + * \param len Key length. + * \param val (optional) Value found; it will be set to NULL if not found or errored. + * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found, + * or KNOT_E*. + */ +int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val); + +/*! + * \brief Apply a function to every trie_val_t, in order. + * + * \return KNOT_EOK if success or KNOT_E* if error. + */ +int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d); + +/*! + * \brief Remove an item, returning KNOT_EOK if succeeded or KNOT_ENOENT if not found. + * + * If val!=NULL and deletion succeeded, the deleted value is set. + */ +int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val); + + +/*! \brief Create a new iterator pointing to the first element (if any). + * + * trie_it_* functions deal with these iterators capable of walking and jumping + * over the trie. Note that any modification to key-set stored by the trie + * will in general invalidate all iterators and you will need to begin anew. + * (It won't be detected - you may end up reading freed memory, etc.) + */ +trie_it_t* trie_it_begin(trie_t *tbl); + +/*! \brief Test if the iterator has gone "past the end" (and points nowhere). */ +bool trie_it_finished(trie_it_t *it); + +/*! \brief Free any resources of the iterator. It's OK to call it on NULL. */ +void trie_it_free(trie_it_t *it); + +/*! \brief Copy the iterator. See the warning in trie_it_begin(). */ +trie_it_t *trie_it_clone(const trie_it_t *it); + +/*! + * \brief Return pointer to the key of the current element. + * + * \note The len is uint32_t internally but size_t is better for our usage + * as it is without an additional type conversion. + */ +const trie_key_t* trie_it_key(trie_it_t *it, size_t *len); + +/*! \brief Return pointer to the value of the current element (writable). */ +trie_val_t* trie_it_val(trie_it_t *it); + +/*! + * \brief Advance the iterator to the next element. + * + * Iteration is in ascending lexicographical order. + * In particular, the empty string would be considered as the very first. + * + * \TODO: in most iterator operations, ENOMEM is very unlikely + * but it leads to a _finished() iterator (silently). + * Perhaps the functions should simply return KNOT_E* + */ +void trie_it_next(trie_it_t *it); +/*! \brief Advance the iterator to the previous element. See trie_it_next(). */ +void trie_it_prev(trie_it_t *it); + +/*! \brief Advance iterator to the next element, looping to first after last. */ +void trie_it_next_loop(trie_it_t *it); +/*! \brief Advance iterator to the previous element, looping to last after first. */ +void trie_it_prev_loop(trie_it_t *it); + +/*! \brief Advance iterator to the next element while ignoring the subtree. + * + * \note Another formulation: skip keys that are prefixed by the current key. + * \TODO: name, maybe _unprefixed? The thing is that in the "subtree" meaning + * doesn't correspond to how the pointers go in the implementation, + * but we may not care much for implementation in the API... + */ +void trie_it_next_nosub(trie_it_t *it); + +/*! \brief Advance iterator to the longest prefix of the current key. + * + * \TODO: name, maybe _prefix? Arguments similar to _nosub vs. _unprefixed. + */ +void trie_it_parent(trie_it_t *it); + +/*! \brief trie_get_leq() but with an iterator. */ +int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len); + +/*! \brief Remove the current element. The iterator will get trie_it_finished() */ +void trie_it_del(trie_it_t *it); + + +/*! \brief Start a COW transaction + * + * A copy-on-write transaction starts by obtaining a write lock (in + * your application code) followed by a call to trie_cow(). This + * creates a shared clone of the trie and saves both old and new roots + * in the COW context. + * + * During the COW transaction, you call trie_cow_ins() or + * trie_cow_del() as necessary. These calls ensure that the relevant + * parts of the (new) trie are copied so that they can be modified + * freely. + * + * Your trie_val_t objects must be able to distinguish their + * reachability, either shared, or old-only, or new-only. Before a COW + * transaction the reachability of your objects is indeterminate. + * During a transaction, any trie_val_t objects that might be affected + * (because they are adjacent to a trie_get_cow() or trie_del_cow()) + * are first marked as shared using the callback you pass to + * trie_cow(). + * + * When the transaction is complete, to commit, call trie_cow_new() to + * get the new root, swap the old and new trie roots (e.g. with + * rcu_xchg_pointer()), wait for readers to finish with the old trie + * (e.g. using synchronize_rcu()), then call trie_cow_commit(). For a + * rollback, you can just call trie_cow_rollback() without waiting + * since that doesn't conflict with readers. After trie_cow_commit() + * or trie_cow_rollback() have finished, you can release your write + * lock. + * + * Concurrent reading of the old trie is allowed during a transaction + * provided that it is known when all readers have finished with the + * old version, e.g. using rcu_read_lock() and rcu_read_unlock(). + * There must be only one write transaction at a time. + * + * \param old the old trie + * \param mark_shared callback to mark a leaf as shared (can be NULL) + * \param d extra data for the callback + * \return a pointer to a COW context, + * or NULL if there was a failure + */ +trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d); + +/*! \brief get the new trie from a COW context */ +trie_t* trie_cow_new(trie_cow_t *cow); + +/*! \brief variant of trie_get_ins() for use during COW transactions + * + * As necessary, this copies path from the root of the trie to the + * leaf, so that it is no longer shared. Any leaves adjacent to this + * path are marked as shared using the mark_shared callback passed to + * trie_cow(). + * + * It is your responsibility to COW your trie_val_t objects. If you copy an + * object you must change the original's reachability from shared to old-only. + * New objects (including copies) must have new-only reachability. + */ +trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len); + +/*! + * \brief variant of trie_del() for use during COW transactions + * + * The mark_shared callback is invoked as necessary, in the same way + * as trie_get_cow(). + * + * Returns KNOT_EOK if the key was removed or KNOT_ENOENT if not found. + * If val!=NULL and deletion succeeded, the *val is set to the deleted + * value pointer. + */ +int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val); + +/*! \brief clean up the old trie after committing a COW transaction + * + * Your callback is invoked for any trie_val_t objects that might need + * cleaning up; you must free any objects you have marked as old-only + * and retain objects with shared reachability. + * + * \note The callback can be NULL. + * + * The cow object is free()d, and the new trie root is returned. + */ +trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d); + +/*! \brief clean up the new trie after rolling back a COW transaction + * + * Your callback is invoked for any trie_val_t objects that might need + * cleaning up; you must free any objects you have marked as new-only + * and retain objects with shared reachability. + * + * \note The callback can be NULL. + * + * The cow object is free()d, and the old trie root is returned. + */ +trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d); diff --git a/src/contrib/semaphore.c b/src/contrib/semaphore.c new file mode 100644 index 0000000..ad50dcc --- /dev/null +++ b/src/contrib/semaphore.c @@ -0,0 +1,80 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "semaphore.h" + +#include <assert.h> +#include <stdlib.h> + +#if defined(__APPLE__) +#pragma clang diagnostic ignored "-Wdeprecated-declarations" +#endif + +void knot_sem_init(knot_sem_t *sem, unsigned int value) +{ + int ret = sem_init(&sem->semaphore, 1, value); + if (ret == 0) { + sem->status = -1; + } else { + sem->status = value; + sem->status_lock = malloc(sizeof(*sem->status_lock)); + pthread_mutex_init(&sem->status_lock->mutex, NULL); + pthread_cond_init(&sem->status_lock->cond, NULL); + } +} + +void knot_sem_wait(knot_sem_t *sem) +{ + if (sem->status < 0) { + int semret; + do { + semret = sem_wait(&sem->semaphore); + } while (semret != 0); // repeat wait as it might be interrupted by a signal + } else { + pthread_mutex_lock(&sem->status_lock->mutex); + while (sem->status == 0) { + pthread_cond_wait(&sem->status_lock->cond, &sem->status_lock->mutex); + } + sem->status--; + pthread_mutex_unlock(&sem->status_lock->mutex); + } +} + +void knot_sem_post(knot_sem_t *sem) +{ + if (sem->status < 0) { + int semret = sem_post(&sem->semaphore); + (void)semret; + assert(semret == 0); + } else { + pthread_mutex_lock(&sem->status_lock->mutex); + sem->status++; + pthread_cond_signal(&sem->status_lock->cond); + pthread_mutex_unlock(&sem->status_lock->mutex); + } +} + +void knot_sem_destroy(knot_sem_t *sem) +{ + knot_sem_wait(sem); + if (sem->status < 0) { + sem_destroy(&sem->semaphore); + } else { + pthread_cond_destroy(&sem->status_lock->cond); + pthread_mutex_destroy(&sem->status_lock->mutex); + free(sem->status_lock); + } +} diff --git a/src/contrib/semaphore.h b/src/contrib/semaphore.h new file mode 100644 index 0000000..be49b7f --- /dev/null +++ b/src/contrib/semaphore.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <pthread.h> +#include <semaphore.h> + +#pragma once + +typedef struct { + pthread_mutex_t mutex; + pthread_cond_t cond; +} knot_sem_mutex_t; + +typedef struct { + int status; + union { + sem_t semaphore; + knot_sem_mutex_t *status_lock; + }; +} knot_sem_t; + +void knot_sem_init(knot_sem_t *sem, unsigned int value); + +void knot_sem_wait(knot_sem_t *sem); + +void knot_sem_post(knot_sem_t *sem); + +void knot_sem_destroy(knot_sem_t *sem); diff --git a/src/contrib/sockaddr.c b/src/contrib/sockaddr.c new file mode 100644 index 0000000..2b3218b --- /dev/null +++ b/src/contrib/sockaddr.c @@ -0,0 +1,368 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <netdb.h> + +#include "libknot/errcode.h" +#include "contrib/sockaddr.h" +#include "contrib/openbsd/strlcpy.h" +#include "contrib/macros.h" +#include "contrib/musl/inet_ntop.h" + +int sockaddr_len(const struct sockaddr_storage *ss) +{ + if (ss == NULL) { + return 0; + } + + switch(ss->ss_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + case AF_UNIX: + return sizeof(struct sockaddr_un); + default: + return 0; + } +} + +static int cmp_ipv4(const struct sockaddr_in *a, const struct sockaddr_in *b, + bool ignore_port) +{ + if (a->sin_addr.s_addr < b->sin_addr.s_addr) { + return -1; + } else if (a->sin_addr.s_addr > b->sin_addr.s_addr) { + return 1; + } else { + return ignore_port ? 0 : a->sin_port - b->sin_port; + } +} + +static int cmp_ipv6(const struct sockaddr_in6 *a, const struct sockaddr_in6 *b, + bool ignore_port) +{ + int ret = memcmp(&a->sin6_addr, &b->sin6_addr, sizeof(struct in6_addr)); + if (ret == 0) { + ret = ignore_port ? 0 : a->sin6_port - b->sin6_port; + } + + return ret; +} + +static int cmp_unix(const struct sockaddr_un *a, const struct sockaddr_un *b) +{ + int len_a = strnlen(a->sun_path, sizeof(a->sun_path)); + int len_b = strnlen(b->sun_path, sizeof(b->sun_path)); + int len_min = len_a <= len_b ? len_a : len_b; + + int ret = strncmp(a->sun_path, b->sun_path, len_min); + if (ret == 0) { + ret = len_a - len_b; + } + + return ret; +} + +int sockaddr_cmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b, + bool ignore_port) +{ + assert(a); + assert(b); + if (a->ss_family != b->ss_family) { + return (int)a->ss_family - (int)b->ss_family; + } + + switch (a->ss_family) { + case AF_UNSPEC: + return 0; + case AF_INET: + return cmp_ipv4((struct sockaddr_in *)a, (struct sockaddr_in *)b, + ignore_port); + case AF_INET6: + return cmp_ipv6((struct sockaddr_in6 *)a, (struct sockaddr_in6 *)b, + ignore_port); + case AF_UNIX: + return cmp_unix((struct sockaddr_un *)a, (struct sockaddr_un *)b); + default: + return 1; + } +} + +int sockaddr_set(struct sockaddr_storage *ss, int family, const char *straddr, int port) +{ + if (ss == NULL || straddr == NULL) { + return KNOT_EINVAL; + } + + /* Set family and port. */ + memset(ss, 0, sizeof(*ss)); + ss->ss_family = family; + sockaddr_port_set(ss, port); + + /* Initialize address depending on address family. */ + if (family == AF_INET6) { + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)ss; + if (inet_pton(family, straddr, &ipv6->sin6_addr) < 1) { + return KNOT_ERROR; + } + return KNOT_EOK; + } else if (family == AF_INET) { + struct sockaddr_in *ipv4 = (struct sockaddr_in *)ss; + if (inet_pton(family, straddr, &ipv4->sin_addr) < 1) { + return KNOT_ERROR; + } + return KNOT_EOK; + } else if (family == AF_UNIX) { + struct sockaddr_un *un = (struct sockaddr_un *)ss; + size_t ret = strlcpy(un->sun_path, straddr, sizeof(un->sun_path)); + if (ret >= sizeof(un->sun_path)) { + return KNOT_ESPACE; + } + return KNOT_EOK; + } + + return KNOT_EINVAL; +} + +void *sockaddr_raw(const struct sockaddr_storage *ss, size_t *addr_size) +{ + if (ss == NULL || addr_size == NULL) { + return NULL; + } + + if (ss->ss_family == AF_INET) { + struct sockaddr_in *ipv4 = (struct sockaddr_in *)ss; + *addr_size = sizeof(ipv4->sin_addr); + return &ipv4->sin_addr; + } else if (ss->ss_family == AF_INET6) { + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)ss; + *addr_size = sizeof(ipv6->sin6_addr); + return &ipv6->sin6_addr; + } else if (ss->ss_family == AF_UNIX) { + struct sockaddr_un *un = (struct sockaddr_un *)ss; + *addr_size = sizeof(un->sun_path); + return un->sun_path; + } else { + return NULL; + } +} + +int sockaddr_set_raw(struct sockaddr_storage *ss, int family, + const uint8_t *raw_addr, size_t raw_addr_size) +{ + if (ss == NULL || raw_addr == NULL) { + return KNOT_EINVAL; + } + + memset(ss, 0, sizeof(*ss)); + ss->ss_family = family; + + size_t ss_size = 0; + void *ss_data = sockaddr_raw(ss, &ss_size); + if (ss_data == NULL || + (family != AF_UNIX && ss_size != raw_addr_size) || + (family == AF_UNIX && ss_size <= raw_addr_size)) { + return KNOT_EINVAL; + } + + memcpy(ss_data, raw_addr, raw_addr_size); + + return KNOT_EOK; +} + +int sockaddr_tostr(char *buf, size_t maxlen, const struct sockaddr_storage *ss) +{ + if (ss == NULL || buf == NULL) { + return KNOT_EINVAL; + } + + const char *out = NULL; + + /* Convert network address string. */ + if (ss->ss_family == AF_INET6) { + const struct sockaddr_in6 *s = (const struct sockaddr_in6 *)ss; + out = knot_inet_ntop(ss->ss_family, &s->sin6_addr, buf, maxlen); + } else if (ss->ss_family == AF_INET) { + const struct sockaddr_in *s = (const struct sockaddr_in *)ss; + out = knot_inet_ntop(ss->ss_family, &s->sin_addr, buf, maxlen); + } else if (ss->ss_family == AF_UNIX) { + const struct sockaddr_un *s = (const struct sockaddr_un *)ss; + const char *path = (s->sun_path[0] != '\0' ? s->sun_path : "UNIX socket"); + size_t ret = strlcpy(buf, path, maxlen); + out = (ret < maxlen) ? buf : NULL; + } else { + *buf = '\0'; + return KNOT_EINVAL; + } + + if (out == NULL) { + *buf = '\0'; + return KNOT_ESPACE; + } + + /* Write separator and port. */ + int written = strlen(buf); + int port = sockaddr_port(ss); + if (port > 0) { + int ret = snprintf(&buf[written], maxlen - written, "@%d", port); + if (ret <= 0 || (size_t)ret >= maxlen - written) { + *buf = '\0'; + return KNOT_ESPACE; + } + + written += ret; + } + + return written; +} + +int sockaddr_port(const struct sockaddr_storage *ss) +{ + if (ss == NULL) { + return KNOT_EINVAL; + } + + if (ss->ss_family == AF_INET6) { + return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); + } else if (ss->ss_family == AF_INET) { + return ntohs(((struct sockaddr_in *)ss)->sin_port); + } else { + return KNOT_EINVAL; + } +} + +void sockaddr_port_set(struct sockaddr_storage *ss, uint16_t port) +{ + if (ss == NULL) { + return; + } + + if (ss->ss_family == AF_INET6) { + ((struct sockaddr_in6 *)ss)->sin6_port = htons(port); + } else if (ss->ss_family == AF_INET) { + ((struct sockaddr_in *)ss)->sin_port = htons(port); + } +} + +char *sockaddr_hostname(void) +{ + /* Fetch hostname. */ + char host[256] = ""; + if (gethostname(host, sizeof(host)) != 0) { + return NULL; + } + /* Just to be sure. */ + host[sizeof(host) - 1] = '\0'; + + /* Fetch canonical name for this address/DNS. */ + struct addrinfo hints, *info = NULL; + memset(&hints, 0, sizeof hints); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_flags = AI_CANONNAME; + if (getaddrinfo(host, "domain", &hints, &info) != 0) { + return NULL; + } + + /* Fetch first valid hostname. */ + char *hname = NULL; + struct addrinfo *p = NULL; + for (p = info; p != NULL; p = p->ai_next) { + if (p->ai_canonname) { + hname = strdup(p->ai_canonname); + break; + } + } + + /* No valid hostname found, resort to gethostname() result */ + if (hname == NULL) { + hname = strdup(host); + } + + freeaddrinfo(info); + return hname; +} + +bool sockaddr_is_any(const struct sockaddr_storage *ss) +{ + if (ss == NULL) { + return false; + } + + if (ss->ss_family == AF_INET) { + const struct sockaddr_in *ipv4 = (struct sockaddr_in *)ss; + return ipv4->sin_addr.s_addr == INADDR_ANY; + } + + if (ss->ss_family == AF_INET6) { + const struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)ss; + return memcmp(&ipv6->sin6_addr, &in6addr_any, sizeof(ipv6->sin6_addr)) == 0; + } + + return false; +} + +bool sockaddr_net_match(const struct sockaddr_storage *ss1, + const struct sockaddr_storage *ss2, + unsigned prefix) +{ + if (ss1 == NULL || ss2 == NULL) { + return false; + } + + if (ss1->ss_family != ss2->ss_family) { + return false; + } + + size_t raw_len = 0; + const uint8_t *raw_1 = sockaddr_raw(ss1, &raw_len); + const uint8_t *raw_2 = sockaddr_raw(ss2, &raw_len); + + prefix = MIN(prefix, raw_len * 8); + unsigned bytes = prefix / 8; + unsigned bits = prefix % 8; + + /* Compare full bytes. */ + if (memcmp(raw_1, raw_2, bytes) != 0) { + return false; + } + + /* Compare last partial byte. */ + return bits == 0 || + (raw_1[bytes] >> (8 - bits) == raw_2[bytes] >> (8 - bits)); +} + +bool sockaddr_range_match(const struct sockaddr_storage *ss, + const struct sockaddr_storage *ss_min, + const struct sockaddr_storage *ss_max) +{ + if (ss == NULL || ss_min == NULL || ss_max == NULL) { + return false; + } + + if (ss_min->ss_family != ss_max->ss_family || + ss_min->ss_family != ss->ss_family) { + return false; + } + + return sockaddr_cmp(ss, ss_min, true) >= 0 && + sockaddr_cmp(ss, ss_max, true) <= 0; +} diff --git a/src/contrib/sockaddr.h b/src/contrib/sockaddr.h new file mode 100644 index 0000000..f9228d7 --- /dev/null +++ b/src/contrib/sockaddr.h @@ -0,0 +1,160 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <stdint.h> +#include <unistd.h> + +/* Subnet maximum prefix length. */ +#define IPV4_PREFIXLEN 32 +#define IPV6_PREFIXLEN 128 + +/* Address string "address[@port]" maximum length. */ +#define SOCKADDR_STRLEN_EXT (1 + 6) /* '@', 5 digits number, \0 */ +#define SOCKADDR_STRLEN (sizeof(struct sockaddr_un) + SOCKADDR_STRLEN_EXT) + +/*! + * \brief Calculate current structure length based on address family. + * + * \param ss Socket address. + * + * \return Number of bytes or error code. + */ +int sockaddr_len(const struct sockaddr_storage *ss); + +/*! + * \brief Compare addresses. + * + * \param a First address. + * \param b Second address. + * \param ignore_port Ignore port indication. + * + * \return like memcmp(3) + */ +int sockaddr_cmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b, + bool ignore_port); + +/*! + * \brief Set address and port. + * + * \param ss Socket address. + * \param family Address family. + * \param straddr IP address in string format. + * \param port Port. + * + * \return KNOT_EOK on success or an error code. + */ +int sockaddr_set(struct sockaddr_storage *ss, int family, const char *straddr, int port); + +/*! + * \brief Return raw network address in network byte order. + * + * \param[in] ss Socket address. + * \param[out] addr_size Address length. + * + * \return Pointer to binary buffer of size addr_size. + */ +void *sockaddr_raw(const struct sockaddr_storage *ss, size_t *addr_size); + +/*! + * \brief Set raw address. + * + * \param ss Socket address. + * \param family Address family. + * \param raw_addr IP address in binary format. + * \param raw_addr_size Size of the binary address. + * + * \return KNOT_EOK on success or an error code. + */ +int sockaddr_set_raw(struct sockaddr_storage *ss, int family, + const uint8_t *raw_addr, size_t raw_addr_size); + +/*! + * \brief Return string representation of socket address. + * + * \note String format: \<address>[@<port>], f.e. '127.0.0.1@53' + * + * \param buf Destination for string representation. + * \param maxlen Maximum number of written bytes. + * \param ss Socket address. + * + * \return Number of bytes written on success, error code on failure. + */ +int sockaddr_tostr(char *buf, size_t maxlen, const struct sockaddr_storage *ss); + +/*! + * \brief Return port number from address. + * + * \param ss Socket address. + * + * \return Port number or error code. + */ +int sockaddr_port(const struct sockaddr_storage *ss); + +/*! + * \brief Set port number. + * + * \param ss Socket address. + * \param port Port to set. + */ +void sockaddr_port_set(struct sockaddr_storage *ss, uint16_t port); + +/*! + * \brief Get host FQDN address. + * + * \return Hostname string or NULL. + */ +char *sockaddr_hostname(void); + +/*! + * \brief Check if address is ANY address. + * + * \param ss Socket address. + */ +bool sockaddr_is_any(const struct sockaddr_storage *ss); + +/*! + * \brief Check if two addresses match the given network prefix. + * + * \param ss1 First address. + * \param ss2 Second address. + * \param prefix Prefix length. + * + * \return True on match. + */ +bool sockaddr_net_match(const struct sockaddr_storage *ss1, + const struct sockaddr_storage *ss2, + unsigned prefix); + +/*! + * \brief Check if the address is within the given address range (inclusive). + * + * \param ss Address to check. + * \param ss_min Minimum address. + * \param ss_max Maximum address. + * + * \return True on match. + */ +bool sockaddr_range_match(const struct sockaddr_storage *ss, + const struct sockaddr_storage *ss_min, + const struct sockaddr_storage *ss_max); diff --git a/src/contrib/spinlock.h b/src/contrib/spinlock.h new file mode 100644 index 0000000..837f500 --- /dev/null +++ b/src/contrib/spinlock.h @@ -0,0 +1,133 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Multiplatform spinlock. + */ + +#pragma once + +#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) +/* Not tested and activated yet. */ +/* #define HAVE_STDATOMIC */ +#endif + +#if defined(__APPLE__) +# if defined(MAC_OS_X_VERSION_10_12) || \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_12 +# define APPLE_SPIN_NEW +# else +# define APPLE_SPIN_OLD +# endif /* MAC_OS_X_VERSION_10_12 ... */ +#endif /* __APPLE__ */ + +#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC) +# include <stdbool.h> +#elif defined(HAVE_STDATOMIC) +# include <stdbool.h> +# include <stdatomic.h> +#elif defined(APPLE_SPIN_NEW) +# include <os/lock.h> +#elif defined(APPLE_SPIN_OLD) +# include <libkern/OSAtomic.h> +#else /* POSIX pthread spinlock. */ +# include <pthread.h> +#endif + +/*! \brief Spinlock lock variable type. */ +typedef +#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC) + bool /*!< Spinlock lock - a simple & fast atomic version. */ +#elif defined(HAVE_STDATOMIC) + atomic_bool /*!< Spinlock lock - a simple & fast atomic version, C11 */ +#elif defined(APPLE_SPIN_NEW) + os_unfair_lock /*!< Spinlock lock - a newer macOS version (macOS >= 10.12). */ +#elif defined(APPLE_SPIN_OLD) + OSSpinLock /*!< Spinlock lock - an older macOS version (macOS < 10.12). */ +#else /* POSIX */ + pthread_spinlock_t /*!< Spinlock lock - a POSIX pthread version. */ +#endif + knot_spin_t; + +/*! \brief Initialize the spinlock pointed to by the parameter "lock". */ +static inline void knot_spin_init(knot_spin_t *lock) +{ +#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC) + *lock = false; +#elif defined(HAVE_STDATOMIC) + atomic_init(lock, false); +#elif defined(APPLE_SPIN_NEW) + *lock = OS_UNFAIR_LOCK_INIT; +#elif defined(APPLE_SPIN_OLD) + *lock = OS_SPINLOCK_INIT; +#else /* POSIX */ + pthread_spin_init(lock, PTHREAD_PROCESS_PRIVATE); +#endif +} + +/*! \brief Destroy the spinlock pointed to by the parameter "lock". */ +static inline void knot_spin_destroy(knot_spin_t *lock) +{ +#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC) || defined(HAVE_STDATOMIC) || \ + defined(APPLE_SPIN_NEW) || defined(APPLE_SPIN_OLD) + /* Nothing needed here. */ +#else /* POSIX */ + pthread_spin_destroy(lock); +#endif +} + +/*! \brief Lock the spinlock pointed to by the parameter "lock". */ +static inline void knot_spin_lock(knot_spin_t *lock) +{ +#if defined(HAVE_SYNC_ATOMIC) + while (__sync_lock_test_and_set(lock, 1)) { + } +#elif defined(HAVE_ATOMIC) + int expected = 0; + while (!__atomic_compare_exchange_n(lock, &expected, 1, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { + expected = 0; + } +#elif defined(HAVE_STDATOMIC) + int expected = 0; + while (!atomic_compare_exchange_strong(lock, &expected, false)) { + expected = 0; + } +#elif defined(APPLE_SPIN_NEW) + os_unfair_lock_lock(lock); +#elif defined(APPLE_SPIN_OLD) + OSSpinLockLock(lock); +#else /* POSIX */ + pthread_spin_lock(lock); +#endif +} + +/*! \brief Unlock the spinlock pointed to by the parameter "lock". */ +static inline void knot_spin_unlock(knot_spin_t *lock) +{ +#if defined(HAVE_SYNC_ATOMIC) + __sync_lock_release(lock); +#elif defined(HAVE_ATOMIC) + __atomic_clear(lock, __ATOMIC_RELAXED); +#elif defined(HAVE_STDATOMIC) + atomic_store(lock, false); +#elif defined(APPLE_SPIN_NEW) + os_unfair_lock_unlock(lock); +#elif defined(APPLE_SPIN_OLD) + OSSpinLockUnlock(lock); +#else /* POSIX */ + pthread_spin_unlock(lock); +#endif +} diff --git a/src/contrib/string.c b/src/contrib/string.c new file mode 100644 index 0000000..272116e --- /dev/null +++ b/src/contrib/string.c @@ -0,0 +1,247 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#if defined(HAVE_EXPLICIT_BZERO) + #if defined(HAVE_BSD_STRING_H) + #include <bsd/string.h> + #endif + /* #include <string.h> is needed. */ +#elif defined(HAVE_EXPLICIT_MEMSET) + /* #include <string.h> is needed. */ +#elif defined(HAVE_GNUTLS_MEMSET) + #include <gnutls/gnutls.h> +#else + #define USE_CUSTOM_MEMSET +#endif + +#include "contrib/string.h" +#include "contrib/ctype.h" +#include "contrib/tolower.h" + +uint8_t *memdup(const uint8_t *data, size_t data_size) +{ + uint8_t *result = (uint8_t *)malloc(data_size); + if (!result) { + return NULL; + } + + return memcpy(result, data, data_size); +} + +int strmemcmp(const char *str, const uint8_t *mem, size_t mem_size) +{ + if (mem_size == 0) { + return 1; + } + size_t cmp_len = strnlen(str, mem_size - 1) + 1; + return memcmp(str, mem, cmp_len); +} + +char *sprintf_alloc(const char *fmt, ...) +{ + char *strp = NULL; + va_list ap; + + va_start(ap, fmt); + int ret = vasprintf(&strp, fmt, ap); + va_end(ap); + + if (ret < 0) { + return NULL; + } + return strp; +} + +char *strcdup(const char *s1, const char *s2) +{ + if (!s1 || !s2) { + return NULL; + } + + size_t s1len = strlen(s1); + size_t s2len = strlen(s2); + size_t nlen = s1len + s2len + 1; + + char* dst = malloc(nlen); + if (dst == NULL) { + return NULL; + } + + memcpy(dst, s1, s1len); + memcpy(dst + s1len, s2, s2len + 1); + return dst; +} + +char *strstrip(const char *str) +{ + // leading white-spaces + const char *scan = str; + while (is_space(scan[0])) { + scan += 1; + } + + // trailing white-spaces + size_t len = strlen(scan); + while (len > 0 && is_space(scan[len - 1])) { + len -= 1; + } + + char *trimmed = malloc(len + 1); + if (!trimmed) { + return NULL; + } + + memcpy(trimmed, scan, len); + trimmed[len] = '\0'; + + return trimmed; +} + +void strtolower(char *str) +{ + if (str == NULL) { + return; + } + + for (char *it = str; *it != '\0'; ++it) { + *it = knot_tolower(*it); + } +} + +int const_time_memcmp(const void *s1, const void *s2, size_t n) +{ + volatile uint8_t equal = 0; + + for (size_t i = 0; i < n; i++) { + equal |= ((uint8_t *)s1)[i] ^ ((uint8_t *)s2)[i]; + } + + return equal; +} + +#if defined(USE_CUSTOM_MEMSET) +typedef void *(*memset_t)(void *, int, size_t); +static volatile memset_t volatile_memset = memset; +#endif + +void *memzero(void *s, size_t n) +{ +#if defined(HAVE_EXPLICIT_BZERO) /* In OpenBSD since 5.5. */ + /* In FreeBSD since 11.0. */ + /* In glibc since 2.25. */ + /* In DragonFly BSD since 5.5. */ +# if defined(__has_feature) +# if __has_feature(memory_sanitizer) + #warning "Memory sanitizer detected. Using bzero() instead of explicit_bzero()." + bzero(s, n); +# else + explicit_bzero(s, n); +# endif +# else + explicit_bzero(s, n); +# endif + return s; +#elif defined(HAVE_EXPLICIT_MEMSET) /* In NetBSD since 7.0. */ + return explicit_memset(s, 0, n); +#elif defined(HAVE_GNUTLS_MEMSET) /* In GnuTLS since 3.4.0. */ + gnutls_memset(s, 0, n); + return s; +#else /* Knot custom solution as a fallback. */ + /* Warning: the use of the return value is *probably* needed + * so as to avoid the volatile_memset() to be optimized out. + */ + return volatile_memset(s, 0, n); +#endif +} + +static const char BIN_TO_HEX[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' +}; + +char *bin_to_hex(const uint8_t *bin, size_t bin_len, bool upper_case) +{ + if (bin == NULL) { + return NULL; + } + + size_t hex_size = bin_len * 2; + char *hex = malloc(hex_size + 1); + if (hex == NULL) { + return NULL; + } + + unsigned offset = upper_case ? 16 : 0; + for (size_t i = 0; i < bin_len; i++) { + hex[2 * i] = BIN_TO_HEX[offset + (bin[i] >> 4)]; + hex[2 * i + 1] = BIN_TO_HEX[offset + (bin[i] & 0x0f)]; + } + hex[hex_size] = '\0'; + + return hex; +} + +/*! + * Convert HEX character to numeric value (assumes valid input). + */ +static uint8_t hex_to_number(const char hex) +{ + if (hex >= '0' && hex <= '9') { + return hex - '0'; + } else if (hex >= 'a' && hex <= 'f') { + return hex - 'a' + 10; + } else { + assert(hex >= 'A' && hex <= 'F'); + return hex - 'A' + 10; + } +} + +uint8_t *hex_to_bin(const char *hex, size_t *out_len) +{ + if (hex == NULL || out_len == NULL) { + return NULL; + } + + size_t hex_len = strlen(hex); + if (hex_len % 2 != 0) { + return NULL; + } + + size_t bin_len = hex_len / 2; + uint8_t *bin = malloc(bin_len + 1); + if (bin == NULL) { + return NULL; + } + + for (size_t i = 0; i < bin_len; i++) { + if (!is_xdigit(hex[2 * i]) || !is_xdigit(hex[2 * i + 1])) { + free(bin); + return NULL; + } + uint8_t high = hex_to_number(hex[2 * i]); + uint8_t low = hex_to_number(hex[2 * i + 1]); + bin[i] = high << 4 | low; + } + + *out_len = bin_len; + + return bin; +} diff --git a/src/contrib/string.h b/src/contrib/string.h new file mode 100644 index 0000000..ad3c990 --- /dev/null +++ b/src/contrib/string.h @@ -0,0 +1,104 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief String manipulations. + */ + +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +/*! + * \brief Create a copy of a binary buffer. + * + * Like \c strdup, but for binary data. + */ +uint8_t *memdup(const uint8_t *data, size_t data_size); + +/*! + * \brief Compare a zero-terminated string with fixed-size memory. + */ +int strmemcmp(const char *str, const uint8_t *mem, size_t mem_size); + +/*! + * \brief Format string and take care of allocating memory. + * + * \note sprintf(3) manual page reference implementation. + * + * \param fmt Message format. + * \return formatted message or NULL. + */ +char *sprintf_alloc(const char *fmt, ...); + +/*! + * \brief Create new string from a concatenation of s1 and s2. + * + * \param s1 First string. + * \param s2 Second string. + * + * \retval Newly allocated string on success. + * \retval NULL on error. + */ +char *strcdup(const char *s1, const char *s2); + +/*! + * \brief Create a copy of a string skipping leading and trailing white spaces. + * + * \return Newly allocated string, NULL in case of error. + */ +char *strstrip(const char *str); + +/*! + * \brief Convert upper-case letters to lower-case in a string. + */ +void strtolower(char *str); + +/*! + * \brief Compare data in time based on string length. + * This function just checks for (in)equality not for relation + * + * \param s1 The first address to compare. + * \param s2 The second address to compare. + * \param n The size of memory to compare. + * + * \return Non zero on difference and zero if the buffers are identical. + */ +int const_time_memcmp(const void *s1, const void *s2, size_t n); + +/*! + * \brief Fill memory with zeroes. + * + * Inspired by OPENSSL_cleanse. Such a memset shouldn't be optimized out. + * + * \param s The address to fill. + * \param n The size of memory to fill. + * + * \return Pointer to the memory. + */ +void *memzero(void *s, size_t n); + +/*! + * \brief Convert binary data to hexadecimal string. + */ +char *bin_to_hex(const uint8_t *bin, size_t bin_len, bool upper_case); + +/*! + * \brief Convert hex encoded string to binary data. + */ +uint8_t *hex_to_bin(const char *hex, size_t *out_len); diff --git a/src/contrib/strtonum.h b/src/contrib/strtonum.h new file mode 100644 index 0000000..849f66f --- /dev/null +++ b/src/contrib/strtonum.h @@ -0,0 +1,125 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <assert.h> +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> + +#include "libknot/errcode.h" +#include "contrib/ctype.h" + +inline static int intmax_from_str(const char *src, intmax_t *dst, + intmax_t min, intmax_t max) +{ + if (!is_digit(*src) && *src != '-' && *src != '+') { + return KNOT_EINVAL; + } + + errno = 0; + char *end = NULL; + intmax_t result = strtoimax(src, &end, 10); + + if (errno == ERANGE) { + return KNOT_ERANGE; + } + + if (src == end || *end != '\0') { + return KNOT_EINVAL; + } + + if (result < min || result > max) { + return KNOT_ERANGE; + } + + *dst = result; + return KNOT_EOK; +} + +inline static int uintmax_from_str(const char *src, uintmax_t *dst, + uintmax_t min, uintmax_t max) +{ + if (!is_digit(*src) && *src != '+') { + return KNOT_EINVAL; + } + + errno = 0; + char *end = NULL; + uintmax_t result = strtoumax(src, &end, 10); + + if (errno == ERANGE) { + return KNOT_ERANGE; + } + + if (src == end || *end != '\0') { + return KNOT_EINVAL; + } + + if (result < min || result > max) { + return KNOT_ERANGE; + } + + *dst = result; + return KNOT_EOK; +} + +#define CONVERT(prefix, type, min, max, src, dst) \ +{ \ + assert(src && dst); \ + prefix##max_t value; \ + int result = prefix##max_from_str(src, &value, min, max); \ + if (result != KNOT_EOK) { \ + return result; \ + } \ + *dst = (type)value; \ + return KNOT_EOK; \ +} + +inline static int str_to_int(const char *src, int *dst, int min, int max) +{ + CONVERT(int, int, min, max, src, dst); +} + +inline static int str_to_u8(const char *src, uint8_t *dst) +{ + CONVERT(uint, uint8_t, 0, UINT8_MAX, src, dst); +} + +inline static int str_to_u16(const char *src, uint16_t *dst) +{ + CONVERT(uint, uint16_t, 0, UINT16_MAX, src, dst); +} + +inline static int str_to_u32(const char *src, uint32_t *dst) +{ + CONVERT(uint, uint32_t, 0, UINT32_MAX, src, dst); +} + +inline static int str_to_u64(const char *src, uint64_t *dst) +{ + CONVERT(uint, uint64_t, 0, UINT64_MAX, src, dst); +} + +inline static int str_to_size(const char *src, size_t *dst, size_t min, size_t max) +{ + CONVERT(uint, size_t, min, max, src, dst); +} + +#undef CONVERT diff --git a/src/contrib/time.c b/src/contrib/time.c new file mode 100644 index 0000000..a3a4cdf --- /dev/null +++ b/src/contrib/time.c @@ -0,0 +1,441 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <inttypes.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "contrib/time.h" +#include "contrib/ctype.h" +#ifndef HAVE_CLOCK_GETTIME + #include <sys/time.h> +#endif + +struct timespec time_now(void) +{ + struct timespec result = { 0 }; + +#ifdef HAVE_CLOCK_GETTIME + clock_gettime(CLOCK_MONOTONIC, &result); +#else // OS X < Sierra fallback. + struct timeval tmp = { 0 }; + gettimeofday(&tmp, NULL); + result.tv_sec = tmp.tv_sec; + result.tv_nsec = 1000 * tmp.tv_usec; +#endif + + return result; +} + +struct timespec time_diff(const struct timespec *begin, const struct timespec *end) +{ + struct timespec result = { 0 }; + + if (end->tv_nsec >= begin->tv_nsec) { + result.tv_sec = end->tv_sec - begin->tv_sec; + result.tv_nsec = end->tv_nsec - begin->tv_nsec; + } else { + result.tv_sec = end->tv_sec - begin->tv_sec - 1; + result.tv_nsec = 1000000000 - begin->tv_nsec + end->tv_nsec; + } + + return result; +} + +double time_diff_ms(const struct timespec *begin, const struct timespec *end) +{ + struct timespec result = time_diff(begin, end); + + return (result.tv_sec * 1e3) + (result.tv_nsec / 1e6); +} + +typedef struct { + const char *format; + const char *timespec; + const char *parsed; + knot_timediff_t offset; + char offset_sign; + char offset_unit; + struct tm calendar; + int error; +} time_ctx_t; + +// After casting (struct tm) to (int []), we can use indexes... +static int calendar_index(char ind) +{ + switch (ind) { + case 'Y': return 5; + case 'M': return 4; + case 'D': return 3; + case 'h': return 2; + case 'm': return 1; + case 's': return 0; + default: assert(0); return 6; + } +} + +static size_t calendar_digits(int index) +{ + return index == 5 ? 4 : 2; +} + +static size_t unit_value(char unit) +{ + size_t val = 1; + switch (unit) { + case 'M': + return 3600 * 24 * 30; + case 'Y': + val *= 365; + // FALLTHROUGH + case 'D': + val *= 24; + // FALLTHROUGH + case 'h': + val *= 60; + // FALLTHROUGH + case 'm': + val *= 60; + // FALLTHROUGH + case 's': + default: + return val; + } +} + +static knot_time_t time_ctx_finalize(time_ctx_t *ctx) +{ + if (ctx->offset_sign) { + ctx->offset *= unit_value(ctx->offset_unit); + return knot_time_add(knot_time(), (ctx->offset_sign == '-' ? -1 : 1) * ctx->offset); + } else if (ctx->offset) { + return (knot_time_t)ctx->offset; + } else if (ctx->calendar.tm_year != 0) { + ctx->calendar.tm_isdst = -1; + ctx->calendar.tm_year -= 1900; + ctx->calendar.tm_mon -= 1; + // Set UTC timezone before using mktime + putenv("TZ=UTC"); + tzset(); + return (knot_time_t)mktime(&ctx->calendar); + } else { + return (knot_time_t)0; + } +} + +static void time_ctx_reset(time_ctx_t *ctx) +{ + ctx->parsed = ctx->timespec; + ctx->offset = 0; + ctx->offset_sign = 0; + memset(&ctx->calendar, 0, sizeof(ctx->calendar)); + ctx->error = 0; +} + +static void parse_quote(time_ctx_t *ctx) +{ + while (*ctx->format != '|' && *ctx->format != '\0') { + if (*ctx->format == '\'') { + ctx->format++; + return; + } + if (*ctx->format++ != *ctx->parsed++) { + ctx->error = -1; + return; + } + } + ctx->error = -2; + return; +} + +static void parse_offset(time_ctx_t *ctx) +{ + ctx->offset = 0; + ctx->error = -1; + while (is_digit(*ctx->parsed)) { + ctx->offset *= 10; + ctx->offset += *ctx->parsed++ - '0'; + ctx->error = 0; + } +} + +static void parse_calendar(time_ctx_t *ctx, int index) +{ + int *cal_arr = (int *)&ctx->calendar; + cal_arr[index] = 0; + for (size_t i = 0; i < calendar_digits(index); i++) { + if (!is_digit(*ctx->parsed)) { + ctx->error = -1; + return; + } + cal_arr[index] *= 10; + cal_arr[index] += *ctx->parsed++ - '0'; + } +} + +static void parse_sign(time_ctx_t *ctx) +{ + char sign1 = *(ctx->format - 1), sign2 = *ctx->format; + + bool use_sign2 = (sign2 == '+' || sign2 == '-'); + + bool allow_plus = (sign1 == '+' || (sign1 == '-' && sign2 == '+')); + bool allow_minus = (sign1 == '-' || (sign1 == '+' && sign2 == '-')); + assert(sign1 == '+' || sign1 == '-'); + + if ((*ctx->parsed == '+' && allow_plus) || (*ctx->parsed == '-' && allow_minus)) { + ctx->offset_sign = *ctx->parsed++; + ctx->format += (use_sign2 ? 1 : 0); + } else { + ctx->error = -11; + } +} + +static void parse_unit1(time_ctx_t *ctx) +{ + char u = *ctx->parsed++; + switch (u) { + case 'Y': + case 'M': + case 'D': + case 'h': + case 'm': + case 's': + ctx->offset_unit = u; + break; + default: + ctx->error = -1; + } +} + +static void parse_unit2(time_ctx_t *ctx) +{ + char u = *ctx->parsed++; + switch (u) { + case 'y': + case 'd': + ctx->offset_unit = toupper((unsigned char)u); + break; + case 'h': + case 's': + ctx->offset_unit = u; + break; + case 'm': + switch (*ctx->parsed++) { + case 'o': + ctx->offset_unit = 'M'; + break; + case 'i': + ctx->offset_unit = 'm'; + break; + default: + ctx->error = -1; + } + break; + default: + ctx->error = -1; + } +} + +int knot_time_parse(const char *format, const char *timespec, knot_time_t *time) +{ + if (format == NULL || timespec == NULL || time == NULL) { + return -1; + } + + time_ctx_t ctx = { + .format = format, + .timespec = timespec, + .parsed = timespec, + .offset = 0, + .offset_sign = 0, + // we hope that .calendar is zeroed by default + .error = 0, + }; + + while (ctx.error == 0 && *ctx.format != '\0') { + switch (*ctx.format++) { + case '|': + if (*ctx.parsed == '\0') { + *time = time_ctx_finalize(&ctx); + return 0; + } else { + time_ctx_reset(&ctx); + } + break; + case '\'': + parse_quote(&ctx); + break; + case '#': + parse_offset(&ctx); + break; + case 'Y': + case 'M': + case 'D': + case 'h': + case 'm': + case 's': + parse_calendar(&ctx, calendar_index(*(ctx.format - 1))); + break; + case '+': + case '-': + parse_sign(&ctx); + break; + case 'U': + parse_unit1(&ctx); + break; + case 'u': + parse_unit2(&ctx); + break; + default: + return -1; + } + + if (ctx.error < 0) { + while (*ctx.format != '|' && *ctx.format != '\0') { + ctx.format++; + } + time_ctx_reset(&ctx); + ctx.error = (*ctx.format == '\0' ? -1 : 0); + } + } + + if (ctx.error == 0 && *ctx.parsed == '\0') { + *time = time_ctx_finalize(&ctx); + return 0; + } + return -1; +} + +static char *unit_names_mixed[] = { "Y", "M", "D", "h", "m", "s" }; +static char *unit_names_lower[] = { "y", "mo", "d", "h", "mi", "s" }; +static size_t unit_sizes[] = { 3600*24*365, 3600*24*30, 3600*24, 3600, 60, 1 }; +static const size_t unit_count = 6; + +static int print_unit(char *dst, size_t dst_len, char *unit_names[unit_count], + size_t max_units, knot_time_t time) +{ + int ret; + if (time == 0) { + ret = snprintf(dst, dst_len, "0"); + return (ret < 0 || ret >= dst_len ? -1 : 0); + } + knot_timediff_t diff = knot_time_diff(time, knot_time()); + if (dst_len-- < 1) { + return -1; + } + *dst++ = (diff < 0 ? '-' : '+'); + if (diff < 0) { + diff = -diff; + } else if (diff == 0) { + ret = snprintf(dst, dst_len, "0%s", unit_names[unit_count - 1]); + return (ret < 0 || ret >= dst_len ? -1 : 0); + } + size_t curr_unit = 0, used_units = 0; + while (curr_unit < unit_count && used_units < max_units) { + if (diff >= unit_sizes[curr_unit]) { + ret = snprintf(dst, dst_len, "%"KNOT_TIMEDIFF_PRINTF"%s", + diff / unit_sizes[curr_unit], + unit_names[curr_unit]); + if (ret < 0 || ret >= dst_len) { + return -1; + } + dst += ret; + dst_len -= ret; + used_units++; + diff %= unit_sizes[curr_unit]; + } + curr_unit++; + } + return 0; +} + +int knot_time_print(knot_time_print_t format, knot_time_t time, char *dst, size_t dst_len) +{ + if (dst == NULL) { + return -1; + } + + int ret; + switch (format) { + case TIME_PRINT_UNIX: + ret = snprintf(dst, dst_len, "%"KNOT_TIME_PRINTF, time); + return ((ret >= 0 && ret < dst_len) ? 0 : -1); + case TIME_PRINT_ISO8601: + if (time > LONG_MAX) { + return -1; + } + + // Set timezone to UTC before using timezone dependent functions + putenv("TZ=UTC"); + tzset(); + + struct tm lt; + time_t tt = (time_t)time; + ret = (localtime_r(&tt, <) == NULL ? -1 : + strftime(dst, dst_len, "%Y-%m-%dT%H:%M:%SZ", <)); + return (ret > 0 ? 0 : -1); + case TIME_PRINT_RELSEC: + ret = snprintf(dst, dst_len, "%+"KNOT_TIMEDIFF_PRINTF, + knot_time_diff(time, knot_time())); + return ((ret >= 0 && ret < dst_len) ? 0 : -1); + case TIME_PRINT_HUMAN_MIXED: + return print_unit(dst, dst_len, unit_names_mixed, unit_count, time); + case TIME_PRINT_HUMAN_LOWER: + return print_unit(dst, dst_len, unit_names_lower, unit_count, time); + default: + return -1; + } +} + +int knot_time_print_human(knot_time_t time, char *dst, size_t dst_len, bool condensed) +{ + int ret; + knot_time_t num; + bool empty = true; + size_t total_len = 0; + +#define tths_process(unit, unit_name, unit_size) \ + num = time / (unit_size); \ + if (num > 0) { \ + ret = snprintf(dst + total_len, dst_len - total_len, \ + "%s%"PRIu64"%s%s", \ + (!empty && !condensed ? " " : ""), \ + num, \ + (condensed ? unit : unit_name), \ + (num > 1 && !condensed ? "s" : "")); \ + if (ret <= 0 || (size_t)ret >= dst_len - total_len) { \ + return -1; \ + } \ + empty = false; \ + total_len += ret; \ + time -= num * (unit_size); \ + } + + tths_process("w", " week", 604800); + tths_process("d", " day", 86400); + tths_process("h", " hour", 3600); + tths_process("m", " minute", 60); + tths_process("s", " second", 1); + +#undef tths_process + + return total_len > 0 ? total_len : -1; +} diff --git a/src/contrib/time.h b/src/contrib/time.h new file mode 100644 index 0000000..20d241e --- /dev/null +++ b/src/contrib/time.h @@ -0,0 +1,211 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <inttypes.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <time.h> + +#ifdef __APPLE__ + #define st_mtim st_mtimespec +#endif + +/*! + * \brief Specify output format for knot_time_print(). + */ +typedef enum { + TIME_PRINT_UNIX, // numeric UNIX time + TIME_PRINT_ISO8601, // 2016-12-31T23:59:00 + TIME_PRINT_RELSEC, // relative +6523 + TIME_PRINT_HUMAN_MIXED, // relative with mixed-case units + TIME_PRINT_HUMAN_LOWER, // relative with lower-case units +} knot_time_print_t; + +/*! + * \brief Get current time. + */ +struct timespec time_now(void); + +/*! + * \brief Get time elapsed between two events. + */ +struct timespec time_diff(const struct timespec *begin, const struct timespec *end); + +/*! + * \brief Get time elapsed between two events in milliseconds. + */ +double time_diff_ms(const struct timespec *begin, const struct timespec *end); + +/*! + * \brief Data type for keeping UNIX timestamps. + * + * This is because time_t can be 32-bit on some systems, which is bad. + * Zero value represents infinity. + */ +typedef uint64_t knot_time_t; + +/*! + * \brief Data type for keeping time differences. + */ +typedef int64_t knot_timediff_t; + +#define KNOT_TIMEDIFF_MIN INT64_MIN +#define KNOT_TIMEDIFF_MAX INT64_MAX + +#define KNOT_TIME_PRINTF PRIu64 +#define KNOT_TIMEDIFF_PRINTF PRId64 + +/*! + * \brief Returns current time sice epoch. + */ +inline static knot_time_t knot_time(void) +{ + return (knot_time_t)time(NULL); +} + +/*! + * \brief Compare two timestamps. + * + * \return 0 if equal, -1 if the former is smaller (=earlier), 1 else. + */ +inline static int knot_time_cmp(knot_time_t a, knot_time_t b) +{ + return (a == b ? 0 : 1) * ((a && b) == 0 ? -1 : 1) * (a < b ? -1 : 1); +} + +/*! + * \brief Return the smaller (=earlier) from given two timestamps. + */ +inline static knot_time_t knot_time_min(knot_time_t a, knot_time_t b) +{ + if ((a && b) == 0) { + return a + b; + } else { + return (a < b ? a : b); + } +} + +/*! + * \brief Return the difference between two timestamps (to "minus" from). + * + * \note If both are zero (=infinity), KNOT_TIMEDIFF_MAX is returned. + */ +inline static knot_timediff_t knot_time_diff(knot_time_t to, knot_time_t from) +{ + if ((to && from) == 0) { + return (to > from ? KNOT_TIMEDIFF_MIN : KNOT_TIMEDIFF_MAX); + } else { + return (knot_timediff_t)to - (knot_timediff_t)from; + } +} + +/*! + * \brief Add a time difference to timestamp. + */ +inline static knot_time_t knot_time_add(knot_time_t since, knot_timediff_t howlong) +{ + return (since != 0 ? since + howlong : since); +} + +inline static knot_time_t knot_time_plus(knot_time_t a, knot_time_t b) +{ + return ((a && b) ? a + b : 0); +} + +/*! + * \brief Convert uint32_t-encoded timestamp to knot_time_t. + * + * In RRSIG rdata, there are inception and expiration timestamps in uint32_t format. + * One shall use 'serial arithmetics' to decode them. + * + * The result of this function is a timestamp that equals to + * given 32-bit time in lower 32 bits, and does not differ from + * now by more than 2^31. + */ +inline static knot_time_t knot_time_from_u32(uint32_t u32time, knot_time_t now) +{ + if (now == 0) { + now = knot_time(); + } + + uint32_t now_lower32 = (uint32_t)now; + uint64_t now_upper32 = now >> 32; + if (now_lower32 > u32time && now_lower32 - u32time > INT32_MAX) { + now_upper32++; + } else if (now_lower32 < u32time && u32time - now_lower32 > INT32_MAX) { + now_upper32--; + } + + return (now_upper32 << 32) + u32time; +} + +/*! + * \brief Parse a text-formatted timestamp to knot_time_t using format specification. + * + * \param format The timestamp text format specification. + * \param timespec Text-formatted timestamp. + * \param time The parsed timestamp. + * + * The format specification basics: + * <format 1>|<format 2> - The pipe sign separates two time format specifications. Leftmost + * specification matching the timespec is used. + * '<a string>' - Matches exactly <a string> (not containing apostrophes) in timespec. + * # - Hashtag matches for a number in timespec, stands for either a UNIX timestamp, + * or, within a context of an unit, as a number of such units. + * Y, M, D, h, m, s - Matches a number, stands for a number of years, months, days, hours, + * minutes and seconds, respectively. + * +, - - The + and - signs declaring that following timespec is relative to "now". + * A single sign can be used to limit the timestamp being in future or in past, + * or both +- allow the timestamp to select any (just one) of them. + * U - Matches one of Y, M, D, h, m, s in the timespec standing for a time unit. + * u - Like U, but the unit in the timestamp is from: y, mo, d, h, mi, s. + * + * \retval -1 An error occurred, out_time has no sense. + * \return 0 OK, timestamp parsed successfully. + */ +int knot_time_parse(const char *format, const char *timespec, knot_time_t *time); + +/*! + * \brief Print the timestamp in specified format into a string buffer. + * + * \param format The timestamp text format specification. + * \param time The timestamp to be printed. + * \param dst The destination buffer pointer with text-formatted timestamp. + * \param dst_len The destination buffer length. + * + * \retval -1 An error occurred, the buffer may be filled with nonsense. + * \return 0 OK, timestamp printed successfully. + */ +int knot_time_print(knot_time_print_t format, knot_time_t time, char *dst, size_t dst_len); + +/*! + * \brief Print the timestamp in a predefined human format. + * + * Condensed format (zone file compatible): 1w2d3h4m5s + * Normal format: 1 week 2 days 3 hours 4 minutes 5 seconds + * + * \param time The timestamp to be printed. + * \param dst The destination buffer pointer with text-formatted timestamp. + * \param dst_len The destination buffer length. + * \param condensed Condensed format indication. + * + * \retval -1 An error occurred, the buffer may be filled with nonsense. + * \return >0 OK, timestamp printed successfully. + */ +int knot_time_print_human(knot_time_t time, char *dst, size_t dst_len, bool condensed); diff --git a/src/contrib/toeplitz.h b/src/contrib/toeplitz.h new file mode 100644 index 0000000..02974d2 --- /dev/null +++ b/src/contrib/toeplitz.h @@ -0,0 +1,122 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Microsoft Toeplitz-based hash implementation + */ + +#pragma once + +#include <assert.h> +#include <stdint.h> + +#include "libknot/endian.h" + +/*! + * \brief Computes a Toeplitz hash value for given key and data. + * + * \param key Key vector + * \param key_len Length of the key vector in bytes. + * \param data Input data to compute hash from. + * \param data_len Lenght of the input data in bytes. + * + * \return A Toeplitz hash value. + */ +inline static uint32_t toeplitz_hash(const uint8_t *key, const size_t key_len, + const uint8_t *data, const size_t data_len) +{ + assert(key_len >= 4 + 2 * (16 + 2)); + + uint32_t key32 = be32toh(*(const uint32_t *)key); + key += sizeof(uint32_t); + + int ret = 0; + + for (int i = 0; i < data_len; i++) { + for (int bit = 7; bit >= 0; bit--) { + if (data[i] & (1 << bit)) { + ret ^= key32; + } + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/*! + * \brief Toeplitz hash context for divided processing. + */ +typedef struct { + const uint8_t *data; + const uint8_t *data_end; + const uint8_t *key; + uint32_t hash; + uint32_t key32; +} toeplitz_ctx_t; + +inline static void toeplitz_init(toeplitz_ctx_t *ctx, uint8_t count, + const uint8_t *key, const uint8_t key_len, + const uint8_t *data, const uint8_t data_len) +{ + assert(key_len >= 40); + + ctx->data = data; + ctx->data_end = data + data_len; + ctx->key = key + sizeof(uint32_t); + ctx->hash = 0; + ctx->key32 = be32toh(*(const uint32_t *)key); + + const uint8_t *stop = ctx->data + count; + assert(stop <= ctx->data_end); + + while (ctx->data < stop) { + for (int bit = 7; bit >= 0; bit--) { + if (*ctx->data & (1 << bit)) { + ctx->hash ^= ctx->key32; + } + + ctx->key32 <<= 1; + ctx->key32 |= !!(*ctx->key & (1 << bit)); + } + ctx->data++; + ctx->key++; + } +} + +inline static uint32_t toeplitz_finish(toeplitz_ctx_t *ctx) +{ + uint32_t hash = ctx->hash; + uint32_t key32 = ctx->key32; + const uint8_t *key = ctx->key; + + for (const uint8_t *in = ctx->data; in < ctx->data_end; in++) { + for (int bit = 7; bit >= 0; bit--) { + if (*in & (1 << bit)) { + hash ^= key32; + } + + key32 <<= 1; + key32 |= !!(*key & (1 << bit)); + } + key++; + } + + return hash; +} diff --git a/src/contrib/tolower.h b/src/contrib/tolower.h new file mode 100644 index 0000000..c8b4897 --- /dev/null +++ b/src/contrib/tolower.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Table for converting ASCII characters to lowercase. + */ + +#pragma once + +#include <stdint.h> + +/*! + * \brief Converts binary character to lowercase. + * + * \param c Character code. + * + * \return \a c converted to lowercase (or \a c if not applicable). + */ +static inline uint8_t knot_tolower(uint8_t c) { + const uint8_t *tolower_table = (uint8_t *) + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F" + "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F" + "\x40\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x5B\x5C\x5D\x5E\x5F" + "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F" + "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" + "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" + "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF" + "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF" + "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF" + "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF" + "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"; + + return tolower_table[c]; +} diff --git a/src/contrib/trim.h b/src/contrib/trim.h new file mode 100644 index 0000000..2b89249 --- /dev/null +++ b/src/contrib/trim.h @@ -0,0 +1,36 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \brief Heap memory trimmer. + */ + +#pragma once + +#ifdef HAVE_MALLOC_TRIM +#include <malloc.h> +#endif + +/*! + * \brief Trim excess heap memory. + */ +static inline void mem_trim(void) +{ +#ifdef HAVE_MALLOC_TRIM + malloc_trim(0); +#endif + return; +} diff --git a/src/contrib/ucw/LICENSE b/src/contrib/ucw/LICENSE new file mode 100644 index 0000000..b463d57 --- /dev/null +++ b/src/contrib/ucw/LICENSE @@ -0,0 +1 @@ +../licenses/LGPL-2.0
\ No newline at end of file diff --git a/src/contrib/ucw/array-sort.h b/src/contrib/ucw/array-sort.h new file mode 100644 index 0000000..1ff1377 --- /dev/null +++ b/src/contrib/ucw/array-sort.h @@ -0,0 +1,195 @@ +/* + * UCW Library -- Universal Simple Array Sorter + * + * (c) 2003--2008 Martin Mares <mj@ucw.cz> + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#pragma once + +#include "contrib/macros.h" + +/* + * This is not a normal header file, it's a generator of sorting + * routines. Each time you include it with parameters set in the + * corresponding preprocessor macros, it generates an array sorter + * with the parameters given. + * + * You might wonder why the heck do we implement our own array sorter + * instead of using qsort(). The primary reason is that qsort handles + * only continuous arrays, but we need to sort array-like data structures + * where the only way to access elements is by using an indexing macro. + * Besides that, we are more than 2 times faster. + * + * So much for advocacy, there are the parameters (those marked with [*] + * are mandatory): + * + * ASORT_PREFIX(x) [*] add a name prefix (used on all global names + * defined by the sorter) + * ASORT_KEY_TYPE [*] data type of a single array entry key + * ASORT_ELT(i) returns the key of i-th element; if this macro is not + * defined, the function gets a pointer to an array to be sorted + * ASORT_LT(x,y) x < y for ASORT_KEY_TYPE (default: "x<y") + * ASORT_SWAP(i,j) swap i-th and j-th element (default: assume _ELT + * is an l-value and swap just the keys) + * ASORT_THRESHOLD threshold for switching between quicksort and insertsort + * ASORT_EXTRA_ARGS extra arguments for the sort function (they are always + * visible in all the macros supplied above), starts with comma + * + * After including this file, a function ASORT_PREFIX(sort)(unsigned array_size) + * or ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, unsigned array_size) [if ASORT_ELT + * is not defined] is declared and all parameter macros are automatically + * undef'd. + */ + +#ifndef ASORT_LT +#define ASORT_LT(x,y) ((x) < (y)) +#endif + +#ifndef ASORT_SWAP +#define ASORT_SWAP(i,j) do { ASORT_KEY_TYPE tmp = ASORT_ELT(i); ASORT_ELT(i)=ASORT_ELT(j); ASORT_ELT(j)=tmp; } while (0) +#endif + +#ifndef ASORT_THRESHOLD +#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */ +#endif + +#ifndef ASORT_EXTRA_ARGS +#define ASORT_EXTRA_ARGS +#endif + +#ifndef ASORT_ELT +#define ASORT_ARRAY_ARG ASORT_KEY_TYPE *array, +#define ASORT_ELT(i) array[i] +#else +#define ASORT_ARRAY_ARG +#endif + +/** + * The generated sorting function. If `ASORT_ELT` macro is not provided, the + * @ASORT_ARRAY_ARG is equal to `ASORT_KEY_TYPE *array` and is the array to be + * sorted. If the macro is provided, this parameter is omitted. In that case, + * you can sort global variables or pass your structure by @ASORT_EXTRA_ARGS. + **/ +static void ASORT_PREFIX(sort)(ASORT_ARRAY_ARG unsigned array_size ASORT_EXTRA_ARGS) +{ + struct stk { int l, r; } stack[8*sizeof(unsigned)]; + int l, r, left, right, m; + unsigned sp = 0; + ASORT_KEY_TYPE pivot; + + if (array_size <= 1) + return; + + /* QuickSort with optimizations a'la Sedgewick, but stop at ASORT_THRESHOLD */ + + left = 0; + right = array_size - 1; + for(;;) + { + l = left; + r = right; + m = (l+r)/2; + if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l))) + ASORT_SWAP(l,m); + if (ASORT_LT(ASORT_ELT(r), ASORT_ELT(m))) + { + ASORT_SWAP(m,r); + if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l))) + ASORT_SWAP(l,m); + } + pivot = ASORT_ELT(m); + do + { + while (ASORT_LT(ASORT_ELT(l), pivot)) + l++; + while (ASORT_LT(pivot, ASORT_ELT(r))) + r--; + if (l < r) + { + ASORT_SWAP(l,r); + l++; + r--; + } + else if (l == r) + { + l++; + r--; + } + } + while (l <= r); + if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD) + { + /* Both partitions ok => push the larger one */ + if ((r - left) > (right - l)) + { + stack[sp].l = left; + stack[sp].r = r; + left = l; + } + else + { + stack[sp].l = l; + stack[sp].r = right; + right = r; + } + sp++; + } + else if ((r - left) >= ASORT_THRESHOLD) + { + /* Left partition OK, right undersize */ + right = r; + } + else if ((right - l) >= ASORT_THRESHOLD) + { + /* Right partition OK, left undersize */ + left = l; + } + else + { + /* Both partitions undersize => pop */ + if (!sp) + break; + sp--; + left = stack[sp].l; + right = stack[sp].r; + } + } + + /* + * We have a partially sorted array, finish by insertsort. Inspired + * by qsort() in GNU libc. + */ + + /* Find minimal element which will serve as a barrier */ + r = MIN(array_size, ASORT_THRESHOLD); + m = 0; + for (l=1; l<r; l++) + if (ASORT_LT(ASORT_ELT(l),ASORT_ELT(m))) + m = l; + ASORT_SWAP(0,m); + + /* Insertion sort */ + for (m=1; m<(int)array_size; m++) + { + l=m; + while (ASORT_LT(ASORT_ELT(m),ASORT_ELT(l-1))) + l--; + while (l < m) + { + ASORT_SWAP(l,m); + l++; + } + } +} + +#undef ASORT_PREFIX +#undef ASORT_KEY_TYPE +#undef ASORT_ELT +#undef ASORT_LT +#undef ASORT_SWAP +#undef ASORT_THRESHOLD +#undef ASORT_EXTRA_ARGS +#undef ASORT_ARRAY_ARG diff --git a/src/contrib/ucw/binsearch.h b/src/contrib/ucw/binsearch.h new file mode 100644 index 0000000..b791d39 --- /dev/null +++ b/src/contrib/ucw/binsearch.h @@ -0,0 +1,50 @@ +/* + * UCW Library -- Generic Binary Search + * + * (c) 2005 Martin Mares <mj@ucw.cz> + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#pragma once + +/*** + * [[defs]] + * Definitions + * ----------- + ***/ + +/** + * Find the first element not lower than \p x in the sorted array \p ary of \p N elements (non-decreasing order). + * Returns the index of the found element or \p N if no exists. Uses `ary_lt_x(ary,i,x)` to compare the i'th element with \p x. + * The time complexity is `O(log(N))`. + **/ +#define BIN_SEARCH_FIRST_GE_CMP(ary, N, ary_lt_x, x, ...) ({ \ + unsigned l = 0, r = (N); \ + while (l < r) \ + { \ + unsigned m = (l+r)/2; \ + if (ary_lt_x(ary, m, x, __VA_ARGS__)) \ + l = m+1; \ + else \ + r = m; \ + } \ + l; \ +}) + +/** + * The default comparison macro for \ref BIN_SEARCH_FIRST_GE_CMP(). + **/ +#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x) + +/** + * Same as \ref BIN_SEARCH_FIRST_GE_CMP(), but uses the default `<` operator for comparisons. + **/ +#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,ARY_LT_NUM,x) + +/** + * Search the sorted array \p ary of \p N elements (non-decreasing) for the first occurrence of \p x. + * Returns the index or -1 if no such element exists. Uses the `<` operator for comparisons. + **/ +#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; }) diff --git a/src/contrib/ucw/heap.c b/src/contrib/ucw/heap.c new file mode 100644 index 0000000..7d74aeb --- /dev/null +++ b/src/contrib/ucw/heap.c @@ -0,0 +1,153 @@ +/* + * Binary heap + * + * (c) 2012 Ondrej Filip <feela@network.cz> + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/*** + * Introduction + * ------------ + * + * Binary heap is a simple data structure, which for example supports efficient insertions, deletions + * and access to the minimal inserted item. We define several macros for such operations. + * Note that because of simplicity of heaps, we have decided to define direct macros instead + * of a <<generic:,macro generator>> as for several other data structures in the Libucw. + * + * A heap is represented by a number of elements and by an array of values. Beware that we + * index this array from one, not from zero as do the standard C arrays. + * + * Most macros use these parameters: + * + * - @num - a variable (signed or unsigned integer) with the number of elements + * - @heap - a C array of type @type; the heap is stored in `heap[1] .. heap[num]`; `heap[0]` is unused + * + * A valid heap must follow these rules: + * + * - `num >= 0` + * - `heap[i] >= heap[i / 2]` for each `i` in `[2, num]` + * + * The first element `heap[1]` is always lower or equal to all other elements. + ***/ + +#include <string.h> +#include <stdlib.h> +#include "contrib/ucw/heap.h" + +static inline void heap_swap(heap_val_t **e1, heap_val_t **e2) +{ + if (e1 == e2) return; /* Stack tmp should be faster than tmpelem. */ + heap_val_t *tmp = *e1; /* Even faster than 2-XOR nowadays. */ + *e1 = *e2; + *e2 = tmp; + int pos = (*e1)->pos; + (*e1)->pos = (*e2)->pos; + (*e2)->pos = pos; +} + +int heap_init(struct heap *h, int (*cmp)(void *, void *), int init_size) +{ + int isize = init_size ? init_size : INITIAL_HEAP_SIZE; + + h->num = 0; + h->max_size = isize; + h->cmp = cmp; + h->data = malloc((isize + 1) * sizeof(heap_val_t*)); /* Temp element unused. */ + + return h->data ? 1 : 0; +} + +void heap_deinit(struct heap *h) +{ + free(h->data); + memset(h, 0, sizeof(*h)); +} + +static inline void _heap_bubble_down(struct heap *h, int e) +{ + int e1; + for (;;) { + e1 = 2 * e; + if (e1 > h->num) break; + if ((h->cmp(*HELEMENT(h, e), *HELEMENT(h, e1)) < 0) && + (e1 == h->num || (h->cmp(*HELEMENT(h, e), *HELEMENT(h, e1 + 1)) < 0))) break; + if ((e1 != h->num) && (h->cmp(*HELEMENT(h, e1 + 1), *HELEMENT(h, e1)) < 0)) e1++; + heap_swap(HELEMENT(h, e), HELEMENT(h, e1)); + e = e1; + } +} + +static inline void _heap_bubble_up(struct heap *h, int e) +{ + int e1; + while (e > 1) { + e1 = e / 2; + if (h->cmp(*HELEMENT(h, e1), *HELEMENT(h, e)) < 0) break; + heap_swap(HELEMENT(h, e), HELEMENT(h, e1)); + e = e1; + } +} + +void heap_replace(struct heap *h, int pos, heap_val_t *e) +{ + *HELEMENT(h, pos) = e; + e->pos = pos; + + if (pos == 1 || h->cmp(*HELEMENT(h, pos / 2), e) < 0) { + _heap_bubble_down(h, pos); + } else { + _heap_bubble_up(h, pos); + } +} + +void heap_delmin(struct heap *h) +{ + if (h->num == 0) return; + if (h->num > 1) { + heap_swap(HHEAD(h), HELEMENT(h, h->num)); + } + (*HELEMENT(h, h->num))->pos = 0; + h->num--; + _heap_bubble_down(h, 1); +} + +int heap_insert(struct heap *h, heap_val_t *e) +{ + if (h->num == h->max_size) { + h->max_size = h->max_size * HEAP_INCREASE_STEP; + h->data = realloc(h->data, (h->max_size + 1) * sizeof(heap_val_t*)); + if (!h->data) { + return 0; + } + } + + h->num++; + *HELEMENT(h, h->num) = e; + e->pos = h->num; + _heap_bubble_up(h, h->num); + return 1; +} + +int heap_find(struct heap *h, heap_val_t *elm) +{ + return ((struct heap_val *) elm)->pos; +} + +void heap_delete(struct heap *h, int e) +{ + heap_swap(HELEMENT(h, e), HELEMENT(h, h->num)); + (*HELEMENT(h, h->num))->pos = 0; + h->num--; + if (h->cmp(*HELEMENT(h, e), *HELEMENT(h, h->num + 1)) < 0) { + _heap_bubble_up(h, e); + } else { + _heap_bubble_down(h, e); + } + + if ((h->num > INITIAL_HEAP_SIZE) && (h->num < h->max_size / HEAP_DECREASE_THRESHOLD)) { + h->max_size = h->max_size / HEAP_INCREASE_STEP; + h->data = realloc(h->data, (h->max_size + 1) * sizeof(heap_val_t*)); + } +} diff --git a/src/contrib/ucw/heap.h b/src/contrib/ucw/heap.h new file mode 100644 index 0000000..f85bb82 --- /dev/null +++ b/src/contrib/ucw/heap.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +struct heap_val { + int pos; +}; + +typedef struct heap_val heap_val_t; + +struct heap { + int num; /* Number of elements */ + int max_size; /* Size of allocated memory */ + int (*cmp)(void *, void *); + heap_val_t **data; +}; /* Array follows */ + +#define INITIAL_HEAP_SIZE 512 /* initial heap size */ +#define HEAP_INCREASE_STEP 2 /* multiplier for each inflation, keep conservative */ +#define HEAP_DECREASE_THRESHOLD 2 /* threshold for deflation, keep conservative */ +#define HELEMENT(h,num) ((h)->data + (num)) +#define HHEAD(h) HELEMENT((h), 1) +#define EMPTY_HEAP(h) ((h)->num == 0) + +int heap_init(struct heap *, int (*cmp)(void *, void *), int); +void heap_deinit(struct heap *); + +void heap_delmin(struct heap *); +int heap_insert(struct heap *, heap_val_t *); +int heap_find(struct heap *, heap_val_t *); +void heap_delete(struct heap *, int); +void heap_replace(struct heap *, int, heap_val_t *); diff --git a/src/contrib/ucw/lists.c b/src/contrib/ucw/lists.c new file mode 100644 index 0000000..01af28f --- /dev/null +++ b/src/contrib/ucw/lists.c @@ -0,0 +1,264 @@ +/* + * BIRD Library -- Linked Lists + * + * (c) 1998 Martin Mares <mj@ucw.cz> + * (c) 2015, 2020-2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: Linked lists + * + * The BIRD library provides a set of functions for operating on linked + * lists. The lists are internally represented as standard doubly linked + * lists with synthetic head and tail which makes all the basic operations + * run in constant time and contain no extra end-of-list checks. Each list + * is described by a &list structure, nodes can have any format as long + * as they start with a &node structure. If you want your nodes to belong + * to multiple lists at once, you can embed multiple &node structures in them + * and use the SKIP_BACK() macro to calculate a pointer to the start of the + * structure from a &node pointer, but beware of obscurity. + * + * There also exist safe linked lists (&slist, &snode and all functions + * being prefixed with |s_|) which support asynchronous walking very + * similar to that used in the &fib structure. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "contrib/ucw/lists.h" +#include "contrib/mempattern.h" + +/** + * add_tail - append a node to a list + * \p l: linked list + * \p n: list node + * + * add_tail() takes a node \p n and appends it at the end of the list \p l. + */ +void +add_tail(list_t *l, node_t *n) +{ + node_t *z = &l->tail; + + n->next = z; + n->prev = z->prev; + z->prev->next = n; + z->prev = n; + assert(z->next == NULL); +} + +/** + * add_head - prepend a node to a list + * \p l: linked list + * \p n: list node + * + * add_head() takes a node \p n and prepends it at the start of the list \p l. + */ +void +add_head(list_t *l, node_t *n) +{ + node_t *z = &l->head; + + n->next = z->next; + n->prev = z; + z->next->prev = n; + z->next = n; + assert(z->prev == NULL); +} + +/** + * insert_node - insert a node to a list + * \p n: a new list node + * \p after: a node of a list + * + * Inserts a node \p n to a linked list after an already inserted + * node \p after. + */ +void +insert_node(node_t *n, node_t *after) +{ + node_t *z = after->next; + + n->next = z; + n->prev = after; + after->next = n; + z->prev = n; +} + +/** + * rem_node - remove a node from a list + * \p n: node to be removed + * + * Removes a node \p n from the list it's linked in. + */ +void +rem_node(node_t *n) +{ + node_t *z = n->prev; + node_t *x = n->next; + + z->next = x; + x->prev = z; + n->prev = 0; + n->next = 0; +} + +/** + * init_list - create an empty list + * \p l: list + * + * init_list() takes a &list structure and initializes its + * fields, so that it represents an empty list. + */ +void +init_list(list_t *l) +{ + l->head.next = &l->tail; + l->head.prev = NULL; + l->tail.next = NULL; + l->tail.prev = &l->head; +} + +/** + * add_tail_list - concatenate two lists + * \p to: destination list + * \p l: source list + * + * This function appends all elements of the list \p l to + * the list \p to in constant time. + */ +void +add_tail_list(list_t *to, list_t *l) +{ + node_t *p = to->tail.prev; + node_t *q = l->head.next; + + p->next = q; + q->prev = p; + to->tail.prev = l->tail.prev; +} + +/** + * list_dup - duplicate list + * \p to: destination list + * \p l: source list + * + * This function duplicates all elements of the list \p l to + * the list \p to in linear time. + * + * This function only works with a homogenous item size. + */ +void list_dup(list_t *dst, list_t *src, size_t itemsz) +{ + node_t *n; + WALK_LIST(n, *src) { + node_t *i = malloc(itemsz); + memcpy(i, n, itemsz); + add_tail(dst, i); + } +} + +/** + * list_size - gets number of nodes + * \p l: list + * + * This function counts nodes in list \p l and returns this number. + */ +size_t list_size(const list_t *l) +{ + size_t count = 0; + + node_t *n; + WALK_LIST(n, *l) { + count++; + } + + return count; +} + +/** + * fix_list - correction of head/tail pointers when list had been memmove'd + * \p l: list + * + * WARNING: must not be called on empty list + */ +void fix_list(list_t *l) +{ + node_t *n = HEAD(*l); + assert(n->next != NULL); + n->prev = &l->head; + + n = TAIL(*l); + assert(n->prev != NULL); + n->next = &l->tail; +} + +/** + * ptrlist_add - add pointer to pointer list + * \p to: destination list + * \p val: added pointer + * \p mm: memory context + */ +ptrnode_t *ptrlist_add(list_t *to, void *val, knot_mm_t *mm) +{ + ptrnode_t *node = mm_alloc(mm , sizeof(ptrnode_t)); + if (node == NULL) { + return NULL; + } else { + node->d = val; + } + add_tail(to, &node->n); + return node; +} + +/** + * ptrlist_free - free all nodes in pointer list + * \p list: list nodes + * \p mm: memory context + */ +void ptrlist_free(list_t *list, knot_mm_t *mm) +{ + node_t *n, *nxt; + WALK_LIST_DELSAFE(n, nxt, *list) { + mm_free(mm, n); + } + init_list(list); +} + +/** + * ptrlist_rem - remove pointer node + * \p val: pointer to remove + * \p mm: memory context + */ +void ptrlist_rem(ptrnode_t *node, knot_mm_t *mm) +{ + rem_node(&node->n); + mm_free(mm, node); +} + +/** + * ptrlist_deep_free - free all nodes incl referenced data + * \p list: list nodes + * \p mm: memory context + */ +void ptrlist_deep_free(list_t *l, knot_mm_t *mm) +{ + ptrnode_t *n; + WALK_LIST(n, *l) { + mm_free(mm, n->d); + } + ptrlist_free(l, mm); +} + +void ptrlist_free_custom(list_t *l, knot_mm_t *mm, ptrlist_free_cb free_cb) +{ + ptrnode_t *n; + WALK_LIST(n, *l) { + free_cb(n->d); + } + ptrlist_free(l, mm); +} diff --git a/src/contrib/ucw/lists.h b/src/contrib/ucw/lists.h new file mode 100644 index 0000000..1a3ca95 --- /dev/null +++ b/src/contrib/ucw/lists.h @@ -0,0 +1,74 @@ +/* + * BIRD Library -- Linked Lists + * + * (c) 1998 Martin Mares <mj@ucw.cz> + * (c) 2015, 2020-2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#pragma once + +#include <string.h> +#include "libknot/mm_ctx.h" + +typedef struct node { + struct node *next, *prev; +} node_t; + +typedef struct list { + node_t head, tail; +} list_t; + +#define NODE (node_t *) +#define HEAD(list) ((void *)((list).head.next)) +#define TAIL(list) ((void *)((list).tail.prev)) +#define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \ + n=(void *)((NODE (n))->next)) +#define WALK_LIST_DELSAFE(n,nxt,list) \ + for(n=HEAD(list); (nxt=(void *)((NODE (n))->next)); n=(void *) nxt) +/* WALK_LIST_FIRST supposes that called code removes each processed node */ +#define WALK_LIST_FIRST(n,list) \ + while(n=HEAD(list), (NODE (n))->next) +#define WALK_LIST_BACKWARDS(n,list) for(n=TAIL(list);(NODE (n))->prev; \ + n=(void *)((NODE (n))->prev)) +#define WALK_LIST_BACKWARDS_DELSAFE(n,prv,list) \ + for(n=TAIL(list); prv=(void *)((NODE (n))->prev); n=(void *) prv) + +#define EMPTY_LIST(list) (!(NODE HEAD(list))->next) + +/*! \brief Free every node in the list. */ +#define WALK_LIST_FREE(list) \ + do { \ + node_t *n=0,*nxt=0; \ + WALK_LIST_DELSAFE(n,nxt,list) { \ + free(n); \ + } \ + init_list(&list); \ + } while(0) + +void add_tail(list_t *, node_t *); +void add_head(list_t *, node_t *); +void rem_node(node_t *); +void add_tail_list(list_t *, list_t *); +void init_list(list_t *); +void insert_node(node_t *, node_t *); +void list_dup(list_t *dst, list_t *src, size_t itemsz); +size_t list_size(const list_t *); +void fix_list(list_t *); + +/*! + * \brief Generic pointer list implementation. + */ +typedef struct ptrnode { + node_t n; + void *d; +} ptrnode_t; + +ptrnode_t *ptrlist_add(list_t *, void *, knot_mm_t *); +void ptrlist_free(list_t *, knot_mm_t *); +void ptrlist_rem(ptrnode_t *node, knot_mm_t *mm); +void ptrlist_deep_free(list_t *, knot_mm_t *); + +typedef void (*ptrlist_free_cb)(void *); +void ptrlist_free_custom(list_t *l, knot_mm_t *mm, ptrlist_free_cb free_cb); diff --git a/src/contrib/ucw/mempool.c b/src/contrib/ucw/mempool.c new file mode 100644 index 0000000..8e835c1 --- /dev/null +++ b/src/contrib/ucw/mempool.c @@ -0,0 +1,323 @@ +/* + * UCW Library -- Memory Pools (One-Time Allocation) + * + * (c) 1997--2001 Martin Mares <mj@ucw.cz> + * (c) 2007 Pavel Charvat <pchar@ucw.cz> + * (c) 2015, 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include <string.h> +#include <strings.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include "contrib/asan.h" +#include "contrib/macros.h" +#include "contrib/ucw/mempool.h" + +/** \todo This shouldn't be precalculated, but computed on load. */ +#define CPU_PAGE_SIZE 4096 + +/** Align an integer \p s to the nearest higher multiple of \p a (which should be a power of two) **/ +#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1)) +#define MP_CHUNK_TAIL ALIGN_TO(sizeof(struct mempool_chunk), CPU_STRUCT_ALIGN) +#define MP_SIZE_MAX (~0U - MP_CHUNK_TAIL - CPU_PAGE_SIZE) +#define DBG(s, ...) + +/** \note Imported MMAP backend from bigalloc.c */ +#define CONFIG_UCW_POOL_IS_MMAP +#ifdef CONFIG_UCW_POOL_IS_MMAP +#include <sys/mman.h> +static void * +page_alloc(uint64_t len) +{ + if (!len) { + return NULL; + } + if (len > SIZE_MAX) { + return NULL; + } + assert(!(len & (CPU_PAGE_SIZE-1))); + uint8_t *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (p == (uint8_t*) MAP_FAILED) { + return NULL; + } + return p; +} + +static void +page_free(void *start, uint64_t len) +{ + assert(!(len & (CPU_PAGE_SIZE-1))); + assert(!((uintptr_t) start & (CPU_PAGE_SIZE-1))); + munmap(start, len); +} +#endif + +struct mempool_chunk { + struct mempool_chunk *next; + unsigned size; +}; + +static unsigned +mp_align_size(unsigned size) +{ +#ifdef CONFIG_UCW_POOL_IS_MMAP + return ALIGN_TO(size + MP_CHUNK_TAIL, CPU_PAGE_SIZE) - MP_CHUNK_TAIL; +#else + return ALIGN_TO(size, CPU_STRUCT_ALIGN); +#endif +} + +void +mp_init(struct mempool *pool, unsigned chunk_size) +{ + chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size)); + *pool = (struct mempool) { + .chunk_size = chunk_size, + .threshold = chunk_size >> 1, + .last_big = &pool->last_big + }; +} + +static void * +mp_new_big_chunk(unsigned size) +{ + uint8_t *data = malloc(size + MP_CHUNK_TAIL); + if (!data) { + return NULL; + } + ASAN_POISON_MEMORY_REGION(data, size); + struct mempool_chunk *chunk = (struct mempool_chunk *)(data + size); + chunk->size = size; + return chunk; +} + +static void +mp_free_big_chunk(struct mempool_chunk *chunk) +{ + void *ptr = (uint8_t *)chunk - chunk->size; + ASAN_UNPOISON_MEMORY_REGION(ptr, chunk->size); + free(ptr); +} + +static void * +mp_new_chunk(unsigned size) +{ +#ifdef CONFIG_UCW_POOL_IS_MMAP + uint8_t *data = page_alloc(size + MP_CHUNK_TAIL); + if (!data) { + return NULL; + } + ASAN_POISON_MEMORY_REGION(data, size); + struct mempool_chunk *chunk = (struct mempool_chunk *)(data + size); + chunk->size = size; + return chunk; +#else + return mp_new_big_chunk(size); +#endif +} + +static void +mp_free_chunk(struct mempool_chunk *chunk) +{ +#ifdef CONFIG_UCW_POOL_IS_MMAP + uint8_t *data = (uint8_t *)chunk - chunk->size; + ASAN_UNPOISON_MEMORY_REGION(data, chunk->size); + page_free(data, chunk->size + MP_CHUNK_TAIL); +#else + mp_free_big_chunk(chunk); +#endif +} + +struct mempool * +mp_new(unsigned chunk_size) +{ + chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size)); + struct mempool_chunk *chunk = mp_new_chunk(chunk_size); + struct mempool *pool = (void *)chunk - chunk_size; + ASAN_UNPOISON_MEMORY_REGION(pool, sizeof(*pool)); + DBG("Creating mempool %p with %u bytes long chunks", pool, chunk_size); + chunk->next = NULL; + ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + *pool = (struct mempool) { + .state = { .free = { chunk_size - sizeof(*pool) }, .last = { chunk } }, + .chunk_size = chunk_size, + .threshold = chunk_size >> 1, + .last_big = &pool->last_big + }; + return pool; +} + +static void +mp_free_chain(struct mempool_chunk *chunk) +{ + while (chunk) { + ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + struct mempool_chunk *next = chunk->next; + mp_free_chunk(chunk); + chunk = next; + } +} + +static void +mp_free_big_chain(struct mempool_chunk *chunk) +{ + while (chunk) { + ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + struct mempool_chunk *next = chunk->next; + mp_free_big_chunk(chunk); + chunk = next; + } +} + +void +mp_delete(struct mempool *pool) +{ + if (pool == NULL) { + return; + } + DBG("Deleting mempool %p", pool); + mp_free_big_chain(pool->state.last[1]); + mp_free_chain(pool->unused); + mp_free_chain(pool->state.last[0]); // can contain the mempool structure +} + +void +mp_flush(struct mempool *pool) +{ + mp_free_big_chain(pool->state.last[1]); + struct mempool_chunk *chunk = pool->state.last[0], *next; + while (chunk) { + ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + if ((uint8_t *)chunk - chunk->size == (uint8_t *)pool) { + break; + } + next = chunk->next; + chunk->next = pool->unused; + ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + pool->unused = chunk; + chunk = next; + } + pool->state.last[0] = chunk; + if (chunk) { + pool->state.free[0] = chunk->size - sizeof(*pool); + ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + } else { + pool->state.free[0] = 0; + } + pool->state.last[1] = NULL; + pool->state.free[1] = 0; + pool->last_big = &pool->last_big; +} + +static void +mp_stats_chain(struct mempool_chunk *chunk, struct mempool_stats *stats, unsigned idx) +{ + struct mempool_chunk *next; + while (chunk) { + ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + stats->chain_size[idx] += chunk->size + sizeof(*chunk); + stats->chain_count[idx]++; + next = chunk->next; + ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + chunk = next; + } + stats->total_size += stats->chain_size[idx]; +} + +void +mp_stats(struct mempool *pool, struct mempool_stats *stats) +{ + bzero(stats, sizeof(*stats)); + mp_stats_chain(pool->state.last[0], stats, 0); + mp_stats_chain(pool->state.last[1], stats, 1); + mp_stats_chain(pool->unused, stats, 2); +} + +uint64_t +mp_total_size(struct mempool *pool) +{ + struct mempool_stats stats; + mp_stats(pool, &stats); + return stats.total_size; +} + +static void * +mp_alloc_internal(struct mempool *pool, unsigned size) +{ + struct mempool_chunk *chunk; + if (size <= pool->threshold) { + pool->idx = 0; + if (pool->unused) { + chunk = pool->unused; + ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + pool->unused = chunk->next; + } else { + chunk = mp_new_chunk(pool->chunk_size); + } + chunk->next = pool->state.last[0]; + ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + pool->state.last[0] = chunk; + pool->state.free[0] = pool->chunk_size - size; + return (uint8_t *)chunk - pool->chunk_size; + } else if (size <= MP_SIZE_MAX) { + pool->idx = 1; + unsigned aligned = ALIGN_TO(size, CPU_STRUCT_ALIGN); + chunk = mp_new_big_chunk(aligned); + if (!chunk) { + return NULL; + } + chunk->next = pool->state.last[1]; + ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); + pool->state.last[1] = chunk; + pool->state.free[1] = aligned - size; + return pool->last_big = (uint8_t *)chunk - aligned; + } else { + fprintf(stderr, "Cannot allocate %u bytes from a mempool", size); + assert(0); + return NULL; + } +} + +void * +mp_alloc(struct mempool *pool, unsigned size) +{ + unsigned avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1); + void *ptr = NULL; + if (size <= avail) { + pool->state.free[0] = avail - size; + ptr = (uint8_t*)pool->state.last[0] - avail; + } else { + ptr = mp_alloc_internal(pool, size); + } + ASAN_UNPOISON_MEMORY_REGION(ptr, size); + return ptr; +} + +void * +mp_alloc_noalign(struct mempool *pool, unsigned size) +{ + void *ptr = NULL; + if (size <= pool->state.free[0]) { + ptr = (uint8_t*)pool->state.last[0] - pool->state.free[0]; + pool->state.free[0] -= size; + } else { + ptr = mp_alloc_internal(pool, size); + } + ASAN_UNPOISON_MEMORY_REGION(ptr, size); + return ptr; +} + +void * +mp_alloc_zero(struct mempool *pool, unsigned size) +{ + void *ptr = mp_alloc(pool, size); + bzero(ptr, size); + return ptr; +} diff --git a/src/contrib/ucw/mempool.h b/src/contrib/ucw/mempool.h new file mode 100644 index 0000000..c5a4fa8 --- /dev/null +++ b/src/contrib/ucw/mempool.h @@ -0,0 +1,124 @@ +/* + * UCW Library -- Memory Pools + * + * (c) 1997--2005 Martin Mares <mj@ucw.cz> + * (c) 2007 Pavel Charvat <pchar@ucw.cz> + * (c) 2015, 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#pragma once + +#include <string.h> +#include <stdint.h> + +#define CPU_STRUCT_ALIGN (sizeof(void*)) + +/*** + * [[defs]] + * Definitions + * ----------- + ***/ + +/** + * Memory pool state (see mp_push(), ...). + * You should use this one as an opaque handle only, the insides are internal. + **/ +struct mempool_state { + unsigned free[2]; + void *last[2]; +}; + +/** + * Memory pool. + * You should use this one as an opaque handle only, the insides are internal. + **/ +struct mempool { + struct mempool_state state; + void *unused, *last_big; + unsigned chunk_size, threshold, idx; +}; + +struct mempool_stats { /** Mempool statistics. See mp_stats(). **/ + uint64_t total_size; /** Real allocated size in bytes. */ + unsigned chain_count[3]; /** Number of allocated chunks in small/big/unused chains. */ + unsigned chain_size[3]; /** Size of allocated chunks in small/big/unused chains. */ +}; + +/*** + * [[basic]] + * Basic manipulation + * ------------------ + ***/ + +/** + * Initialize a given mempool structure. + * \p chunk_size must be in the interval `[1, UINT_MAX / 2]`. + * It will allocate memory by this large chunks and take + * memory to satisfy requests from them. + * + * Memory pools can be treated as <<trans:respools,resources>>, see <<trans:res_mempool()>>. + **/ +void mp_init(struct mempool *pool, unsigned chunk_size); + +/** + * Allocate and initialize a new memory pool. + * See \ref mp_init() for \p chunk_size limitations. + * + * The new mempool structure is allocated on the new mempool. + * + * Memory pools can be treated as <<trans:respools,resources>>, see <<trans:res_mempool()>>. + **/ +struct mempool *mp_new(unsigned chunk_size); + +/** + * Cleanup mempool initialized by mp_init or mp_new. + * Frees all the memory allocated by this mempool and, + * if created by \ref mp_new(), the \p pool itself. + **/ +void mp_delete(struct mempool *pool); + +/** + * Frees all data on a memory pool, but leaves it working. + * It can keep some of the chunks allocated to serve + * further allocation requests. Leaves the \p pool alive, + * even if it was created with \ref mp_new(). + **/ +void mp_flush(struct mempool *pool); + +/** + * Compute some statistics for debug purposes. + * See the definition of the <<struct_mempool_stats,mempool_stats structure>>. + **/ +void mp_stats(struct mempool *pool, struct mempool_stats *stats); +uint64_t mp_total_size(struct mempool *pool); /** How many bytes were allocated by the pool. **/ + +/*** + * [[alloc]] + * Allocation routines + * ------------------- + ***/ + +/** + * The function allocates new \p size bytes on a given memory pool. + * If the \p size is zero, the resulting pointer is undefined, + * but it may be safely reallocated or used as the parameter + * to other functions below. + * + * The resulting pointer is always aligned to a multiple of + * `CPU_STRUCT_ALIGN` bytes and this condition remains true also + * after future reallocations. + **/ +void *mp_alloc(struct mempool *pool, unsigned size); + +/** + * The same as \ref mp_alloc(), but the result may be unaligned. + **/ +void *mp_alloc_noalign(struct mempool *pool, unsigned size); + +/** + * The same as \ref mp_alloc(), but fills the newly allocated memory with zeroes. + **/ +void *mp_alloc_zero(struct mempool *pool, unsigned size); diff --git a/src/contrib/url-parser/LICENSE b/src/contrib/url-parser/LICENSE new file mode 100644 index 0000000..0161703 --- /dev/null +++ b/src/contrib/url-parser/LICENSE @@ -0,0 +1 @@ +../licenses/MIT
\ No newline at end of file diff --git a/src/contrib/url-parser/README.md b/src/contrib/url-parser/README.md new file mode 100644 index 0000000..c3b1919 --- /dev/null +++ b/src/contrib/url-parser/README.md @@ -0,0 +1,14 @@ +url-parser +========== + +This is Joyent's [URL parser][original-source] from their +[http-parser][joyent-http-parser] library. + +## LICENSE + +This derivative is licensed under the same terms as NGINX and copyright Joyent, +Inc. and other Node contributors as outlined in their [LICENSE][]. + +[original-source]: https://github.com/joyent/http-parser/blob/e01811e7f4894d7f0f7f4bd8492cccec6f6b4038/http_parser.c#L2343 +[joyent-http-parser]: https://github.com/joyent/http-parser +[LICENSE]: https://github.com/joyent/http-parser/blob/master/LICENSE-MIT diff --git a/src/contrib/url-parser/url_parser.c b/src/contrib/url-parser/url_parser.c new file mode 100644 index 0000000..5084740 --- /dev/null +++ b/src/contrib/url-parser/url_parser.c @@ -0,0 +1,635 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * This is the http_parser_parse_url function extracted from joyent's + * http-parser library. + */ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> + +#include "url_parser.h" + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#define STRICT_TOKEN(c) (tokens[(unsigned char)c]) + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#if HTTP_PARSER_STRICT +#define TOKEN(c) (tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + + + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_first_http_major + , s_res_http_major + , s_res_first_http_minor + , s_res_http_minor + , s_res_first_status_code + , s_res_status_code + , s_res_status_start + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* FALLTHROUGH */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* FALLTHROUGH */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + assert(u->field_set & (1 << UF_HOST)); + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = p - buf; + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = p - buf ; + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimiters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* FALLTHROUGH */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + /* Don't bother with endp; we've already validated the string */ + unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + + u->port = (uint16_t) v; + } + + return 0; +} diff --git a/src/contrib/url-parser/url_parser.h b/src/contrib/url-parser/url_parser.h new file mode 100644 index 0000000..0f916e0 --- /dev/null +++ b/src/contrib/url-parser/url_parser.h @@ -0,0 +1,64 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * This is the http_parser_parse_url function extracted from joyent's + * http-parser library. + */ + +#pragma once + +#include <stdint.h> + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u); diff --git a/src/contrib/vpool/vpool.c b/src/contrib/vpool/vpool.c new file mode 100644 index 0000000..f130a47 --- /dev/null +++ b/src/contrib/vpool/vpool.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2006, 2008 Alexey Vatchenko <av@bsdua.org> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> + +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "contrib/vpool/vpool.h" + +static void vpool_shift(struct vpool *pool); +static int vpool_new_size(struct vpool *pool, size_t datsize, + size_t *size); +static int vpool_resize(struct vpool *pool, size_t datsize); + +static void +vpool_shift(struct vpool *pool) +{ + if (pool->v_buf != pool->v_basebuf) { + memmove(pool->v_basebuf, pool->v_buf, pool->v_off); + pool->v_buf = pool->v_basebuf; + } +} + +static int +vpool_new_size(struct vpool *pool, size_t datsize, size_t *size) +{ + size_t need; + size_t rem; + + if (datsize <= pool->v_size - pool->v_off) { + *size = pool->v_size; + return (0); + } + + /* Check limit of new requested size */ + if (pool->v_limit - pool->v_off < datsize) { + return (EFBIG); + } + need = pool->v_off + datsize; + + /* Check limit of new size aligned to block size */ + rem = need % pool->v_blksize; + if (rem != 0) { + if (pool->v_limit - pool->v_off >= + datsize + (pool->v_blksize - rem)) { + need += pool->v_blksize - rem; + } else { + need = pool->v_limit; + } + } + + *size = need; + return (0); +} + +static int +vpool_resize(struct vpool *pool, size_t datsize) +{ + void *ret; + size_t size; + int error; + + error = vpool_new_size(pool, datsize, &size); + if (error != 0) { + return (error); + } + + if (size > pool->v_size) { + ret = malloc(size); + if (ret == NULL) { + return (ENOMEM); + } + + if (pool->v_off > 0) { + memcpy(ret, pool->v_buf, pool->v_off); + } + free(pool->v_basebuf); + pool->v_basebuf = pool->v_buf = ret; + pool->v_size = size; + } else if ((pool->v_size - pool->v_off) - + (pool->v_buf - pool->v_basebuf) < datsize) { + vpool_shift(pool); + } + + return (0); +} + +void +vpool_init(struct vpool *pool, size_t blksize, size_t limit) +{ + + pool->v_basebuf = pool->v_buf = NULL; + pool->v_off = pool->v_size = 0; + + pool->v_blksize = (blksize == 0) ? 4096 : blksize; /* XXX */ + pool->v_limit = (limit == 0) ? SIZE_MAX : limit; + + pool->v_lasterr = 0; +} + +void +vpool_final(struct vpool *pool) +{ + free(pool->v_basebuf); +} + +void +vpool_reset(struct vpool *pool) +{ + free(pool->v_basebuf); + pool->v_basebuf = pool->v_buf = NULL; + pool->v_off = pool->v_size = 0; + pool->v_lasterr = 0; +} + +void +vpool_wipe(struct vpool *pool) +{ + pool->v_off = 0; + pool->v_lasterr = 0; +} + +void * +vpool_insert(struct vpool *pool, size_t where, void *data, size_t datsize) +{ + void *ret; + int error; + + error = vpool_resize(pool, datsize); + if (error != 0) { + pool->v_lasterr = error; + return (NULL); + } + + /* + * If ``where'' is greater than or equal to offset then + * we are appending data to the end of the buffer. + */ + if (where > pool->v_off) { + where = pool->v_off; + } + + ret = (uint8_t *)pool->v_buf + where; + if (pool->v_off - where > 0) { + memmove(ret + datsize, ret, pool->v_off - where); + } + memcpy(ret, data, datsize); + pool->v_off += datsize; + pool->v_lasterr = 0; + + return (ret); +} + +void * +vpool_expand(struct vpool *pool, size_t where, size_t size) +{ + void *ret; + int error; + + error = vpool_resize(pool, size); + if (error != 0) { + pool->v_lasterr = error; + return (NULL); + } + + /* + * If ``where'' is greater than or equal to offset then + * we are appending data to the end of the buffer. + */ + if (where > pool->v_off) { + where = pool->v_off; + } + + ret = (uint8_t *)pool->v_buf + where; + if (pool->v_off - where > 0) { + memmove(ret + size, ret, pool->v_off - where); + } + pool->v_off += size; + pool->v_lasterr = 0; + + return (ret); +} + +int +vpool_truncate(struct vpool *pool, + size_t where, size_t size, enum vpool_trunc how) +{ + /* Check if caller wants to remove more data than we have */ + if (where >= pool->v_off || + size > pool->v_off || pool->v_off - size < where) { + pool->v_lasterr = ERANGE; + return (pool->v_lasterr); + } + + if (how == VPOOL_EXCLUDE) { + if (where == 0) { + /* + * Optimization. + * Don't move data, just adjust pointer. + */ + pool->v_buf = (uint8_t *)pool->v_buf + size; + } else { + memmove((uint8_t *)pool->v_buf + where, + (uint8_t *)pool->v_buf + where + size, + pool->v_off - size - where); + } + pool->v_off -= size; + } else { + pool->v_buf = (uint8_t *)pool->v_buf + where; + pool->v_off = size; + } + + pool->v_lasterr = 0; + return (0); +} + +void +vpool_export(struct vpool *pool, void **buf, size_t *size) +{ + vpool_shift(pool); + *buf = pool->v_buf; + *size = pool->v_off; + pool->v_basebuf = pool->v_buf = NULL; + pool->v_off = pool->v_size = 0; + pool->v_lasterr = 0; +} diff --git a/src/contrib/vpool/vpool.h b/src/contrib/vpool/vpool.h new file mode 100644 index 0000000..82e3d66 --- /dev/null +++ b/src/contrib/vpool/vpool.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2006, 2008 Alexey Vatchenko <av@bsdua.org> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * VPool: implementation of pool of data with a variable size. + */ +#ifndef _VPOOL_H_ +#define _VPOOL_H_ + +#include <stddef.h> +#include <limits.h> + +struct vpool { + void *v_basebuf; /* pointer returned by (re|m)alloc() */ + void *v_buf; /* actual data starts here */ + size_t v_off; + size_t v_size; + + size_t v_blksize; + size_t v_limit; + int v_lasterr; +}; + +enum vpool_trunc {VPOOL_EXCLUDE, VPOOL_INCLUDE}; +#define VPOOL_TAIL UINT_MAX + +void vpool_init(struct vpool *pool, size_t blksize, size_t limit); +void vpool_final(struct vpool *pool); + +void vpool_reset(struct vpool *pool); +void vpool_wipe(struct vpool *pool); + +void * vpool_insert(struct vpool *pool, + size_t where, void *data, size_t datsize); +void * vpool_expand(struct vpool *pool, size_t where, size_t size); + +int vpool_truncate(struct vpool *pool, + size_t where, size_t size, enum vpool_trunc how); + +#define vpool_is_empty(pool) ((pool)->v_off == 0) +#define vpool_get_buf(pool) ((pool)->v_buf) +#define vpool_get_length(pool) ((pool)->v_off) +#define vpool_get_error(pool) ((pool)->v_lasterr) + +void vpool_export(struct vpool *pool, void **buf, size_t *size); + +#endif /* !_VPOOL_H_ */ diff --git a/src/contrib/wire_ctx.h b/src/contrib/wire_ctx.h new file mode 100644 index 0000000..f4f1789 --- /dev/null +++ b/src/contrib/wire_ctx.h @@ -0,0 +1,355 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <sys/types.h> + +#include "contrib/string.h" +#include "libknot/endian.h" +#include "libknot/errcode.h" + +/*! + * \brief Struct to keep the wire context. + */ +typedef struct wire_ctx { + size_t size; + uint8_t *wire; + uint8_t *position; + int error; + bool readonly; +} wire_ctx_t; + +/*! + * \brief Initialize wire context. + */ +static inline wire_ctx_t wire_ctx_init(uint8_t *data, size_t size) +{ + assert(data); + + wire_ctx_t result = { + .size = size, + .wire = data, + .position = data, + .error = KNOT_EOK, + .readonly = false + }; + + return result; +} + +/*! + * \brief Initialize read only wire context. + * + * \note No write is performed, and error is set to KNOT_EACCES. + * + */ +static inline wire_ctx_t wire_ctx_init_const(const uint8_t *data, size_t size) +{ + assert(data); + + wire_ctx_t result = wire_ctx_init((uint8_t *)data, size); + result.readonly = true; + + return result; +} + +/*! + * \brief Gets actual position. + * + * \return position from the begin. + */ +static inline size_t wire_ctx_offset(wire_ctx_t *ctx) +{ + assert(ctx); + + return ctx->position - ctx->wire; +} + +/*! + * \brief Set position offset from the begin. + * + * \param offset Wire offset (starts from 0). + * + * \note Noop if previous error. + */ +static inline void wire_ctx_set_offset(wire_ctx_t *ctx, size_t offset) +{ + assert(ctx); + + if (ctx->error != KNOT_EOK) { + return; + } + + if (offset > ctx->size) { + ctx->error = KNOT_ERANGE; + return; + } + + ctx->position = ctx->wire + offset; +} + +/*! + * \brief Gets available bytes. + * + * \return Number of bytes to end. + */ +static inline size_t wire_ctx_available(wire_ctx_t *ctx) +{ + assert(ctx); + + return ctx->size - wire_ctx_offset(ctx); +} + +/*! + * \brief Add offset to the current position. + * + * \note Noop if previous error. + */ +static inline void wire_ctx_skip(wire_ctx_t *ctx, ssize_t offset) +{ + assert(ctx); + + if (ctx->error != KNOT_EOK) { + return; + } + + // Check for out of scope skip. + if (offset >= 0) { + if (offset > wire_ctx_available(ctx)) { + ctx->error = KNOT_ERANGE; + return; + } + } else { + if (-offset > wire_ctx_offset(ctx)) { + ctx->error = KNOT_ERANGE; + return; + } + } + + ctx->position += offset; +} + +/*! + * \brief Check the context if reading is possible. + */ +static inline int wire_ctx_can_read(wire_ctx_t *ctx, size_t size) +{ + assert(ctx); + + if (ctx->error != KNOT_EOK) { + return ctx->error; + } + + if (wire_ctx_available(ctx) < size) { + return KNOT_EFEWDATA; + } + + return KNOT_EOK; +} + +/*! + * \brief Check the context if writing is possible. + */ +static inline int wire_ctx_can_write(wire_ctx_t *ctx, size_t size) +{ + assert(ctx); + + if (ctx->error != KNOT_EOK) { + return ctx->error; + } + + if (ctx->readonly) { + return KNOT_EACCES; + } + + if (wire_ctx_available(ctx) < size) { + return KNOT_ESPACE; + } + + return KNOT_EOK; +} + + +static inline void wire_ctx_read(wire_ctx_t *ctx, void *data, size_t size) +{ + assert(ctx); + assert(data); + + if (ctx->error != KNOT_EOK) { + /* Avoid leaving data uninitialized. */ + memzero(data, size); + return; + } + + int ret = wire_ctx_can_read(ctx, size); + if (ret != KNOT_EOK) { + ctx->error = ret; + memzero(data, size); + return; + } + + memcpy(data, ctx->position, size); + ctx->position += size; +} + +static inline uint8_t wire_ctx_read_u8(wire_ctx_t *ctx) +{ + uint8_t result; + wire_ctx_read(ctx, &result, sizeof(result)); + + return result; +} + +static inline uint16_t wire_ctx_read_u16(wire_ctx_t *ctx) +{ + uint16_t result; + wire_ctx_read(ctx, &result, sizeof(result)); + + return be16toh(result); +} + +static inline uint32_t wire_ctx_read_u32(wire_ctx_t *ctx) +{ + uint32_t result; + wire_ctx_read(ctx, &result, sizeof(result)); + + return be32toh(result); +} + +static inline uint64_t wire_ctx_read_u48(wire_ctx_t *ctx) +{ + /* This case is slightly tricky. */ + uint64_t result = 0; + wire_ctx_read(ctx, (uint8_t *)&result + 1, 6); + + return be64toh(result) >> 8; +} + +static inline uint64_t wire_ctx_read_u64(wire_ctx_t *ctx) +{ + uint64_t result; + wire_ctx_read(ctx, &result, sizeof(result)); + + return be64toh(result); +} + + +static inline void wire_ctx_write(wire_ctx_t *ctx, const void *data, size_t size) +{ + assert(ctx); + + if (ctx->error != KNOT_EOK) { + return; + } + + if (size == 0) { + return; + } + + assert(data); + + int ret = wire_ctx_can_write(ctx, size); + if (ret != KNOT_EOK) { + ctx->error = ret; + return; + } + + memcpy(ctx->position, data, size); + ctx->position += size; +} + +static inline void wire_ctx_write_u8(wire_ctx_t *ctx, uint8_t value) +{ + wire_ctx_write(ctx, &value, sizeof(value)); +} + +static inline void wire_ctx_write_u16(wire_ctx_t *ctx, uint16_t value) +{ + uint16_t beval = htobe16(value); + wire_ctx_write(ctx, &beval, sizeof(beval)); +} + +static inline void wire_ctx_write_u32(wire_ctx_t *ctx, uint32_t value) +{ + uint32_t beval = htobe32(value); + wire_ctx_write(ctx, &beval, sizeof(beval)); +} + +static inline void wire_ctx_write_u48(wire_ctx_t *ctx, uint64_t value) +{ + /* This case is slightly tricky. */ + uint64_t swapped = htobe64(value << 8); + wire_ctx_write(ctx, (uint8_t *)&swapped + 1, 6); +} + +static inline void wire_ctx_write_u64(wire_ctx_t *ctx, uint64_t value) +{ + uint64_t beval = htobe64(value); + wire_ctx_write(ctx, &beval, sizeof(beval)); +} + +static inline void wire_ctx_clear(wire_ctx_t *ctx, size_t size) +{ + assert(ctx); + + if (ctx->error != KNOT_EOK) { + return; + } + + if (size == 0) { + return; + } + + int ret = wire_ctx_can_write(ctx, size); + if (ret != KNOT_EOK) { + ctx->error = ret; + return; + } + + memzero(ctx->position, size); + ctx->position += size; +} + +static inline void wire_ctx_copy(wire_ctx_t *dst, wire_ctx_t *src, size_t size) +{ + assert(dst); + assert(src); + + if (size == 0 || dst->error != KNOT_EOK) { + return; + } + + if (wire_ctx_can_read(src, size) != KNOT_EOK) { + dst->error = KNOT_EFEWDATA; + return; + } + + int ret = wire_ctx_can_write(dst, size); + if (ret != KNOT_EOK) { + dst->error = ret; + return; + } + + memcpy(dst->position, src->position, size); + dst->position += size; + src->position += size; +} |